LLVM 23.0.0git
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements bottom-up and top-down register pressure reduction list
10// schedulers, using standard algorithms. The basic approach uses a priority
11// queue of available nodes to schedule. One at a time, nodes are taken from
12// the priority queue (thus in priority order), checked for legality to
13// schedule, and emitted if legal.
14//
15//===----------------------------------------------------------------------===//
16
17#include "ScheduleDAGSDNodes.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
39#include "llvm/Config/llvm-config.h"
40#include "llvm/IR/InlineAsm.h"
41#include "llvm/MC/MCInstrDesc.h"
47#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <cstdlib>
54#include <iterator>
55#include <limits>
56#include <memory>
57#include <utility>
58#include <vector>
59
60using namespace llvm;
61
62#define DEBUG_TYPE "pre-RA-sched"
63
64STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65STATISTIC(NumUnfolds, "Number of nodes unfolded");
66STATISTIC(NumDups, "Number of duplicated nodes");
67STATISTIC(NumPRCopies, "Number of physical register copies");
68
71 "Bottom-up register reduction list scheduling",
73
76 "Similar to list-burr but schedules in source "
77 "order when possible",
79
82 "Bottom-up register pressure aware list scheduling "
83 "which tries to balance latency and register pressure",
85
88 "Bottom-up register pressure aware list scheduling "
89 "which tries to balance ILP and register pressure",
91
93 "disable-sched-cycles", cl::Hidden, cl::init(false),
94 cl::desc("Disable cycle-level precision during preRA scheduling"));
95
96// Temporary sched=list-ilp flags until the heuristics are robust.
97// Some options are also available under sched=list-hybrid.
99 "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
100 cl::desc("Disable regpressure priority in sched=list-ilp"));
102 "disable-sched-live-uses", cl::Hidden, cl::init(true),
103 cl::desc("Disable live use priority in sched=list-ilp"));
105 "disable-sched-vrcycle", cl::Hidden, cl::init(false),
106 cl::desc("Disable virtual register cycle interference checks"));
108 "disable-sched-physreg-join", cl::Hidden, cl::init(false),
109 cl::desc("Disable physreg def-use affinity"));
111 "disable-sched-stalls", cl::Hidden, cl::init(true),
112 cl::desc("Disable no-stall priority in sched=list-ilp"));
114 "disable-sched-critical-path", cl::Hidden, cl::init(false),
115 cl::desc("Disable critical path priority in sched=list-ilp"));
117 "disable-sched-height", cl::Hidden, cl::init(false),
118 cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120 "disable-2addr-hack", cl::Hidden, cl::init(true),
121 cl::desc("Disable scheduler's two-address hack"));
122
124 "max-sched-reorder", cl::Hidden, cl::init(6),
125 cl::desc("Number of instructions to allow ahead of the critical path "
126 "in sched=list-ilp"));
127
129 AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1),
130 cl::desc("Average inst/cycle when no target itinerary exists."));
131
132namespace {
133
134//===----------------------------------------------------------------------===//
135/// ScheduleDAGRRList - The actual register reduction list scheduler
136/// implementation. This supports both top-down and bottom-up scheduling.
137///
138class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139private:
140 /// NeedLatency - True if the scheduler will make use of latency information.
141 bool NeedLatency;
142
143 /// AvailableQueue - The priority queue to use for the available SUnits.
144 SchedulingPriorityQueue *AvailableQueue;
145
146 /// PendingQueue - This contains all of the instructions whose operands have
147 /// been issued, but their results are not ready yet (due to the latency of
148 /// the operation). Once the operands becomes available, the instruction is
149 /// added to the AvailableQueue.
150 std::vector<SUnit *> PendingQueue;
151
152 /// HazardRec - The hazard recognizer to use.
153 ScheduleHazardRecognizer *HazardRec;
154
155 /// CurCycle - The current scheduler state corresponds to this cycle.
156 unsigned CurCycle = 0;
157
158 /// MinAvailableCycle - Cycle of the soonest available instruction.
159 unsigned MinAvailableCycle = ~0u;
160
161 /// IssueCount - Count instructions issued in this cycle
162 /// Currently valid only for bottom-up scheduling.
163 unsigned IssueCount = 0u;
164
165 /// LiveRegDefs - A set of physical registers and their definition
166 /// that are "live". These nodes must be scheduled before any other nodes that
167 /// modifies the registers can be scheduled.
168 unsigned NumLiveRegs = 0u;
169 std::unique_ptr<SUnit*[]> LiveRegDefs;
170 std::unique_ptr<SUnit*[]> LiveRegGens;
171
172 // Collect interferences between physical register use/defs.
173 // Each interference is an SUnit and set of physical registers.
174 SmallVector<SUnit*, 4> Interferences;
175
177
178 LRegsMapT LRegsMap;
179
180 /// Topo - A topological ordering for SUnits which permits fast IsReachable
181 /// and similar queries.
183
184 // Hack to keep track of the inverse of FindCallSeqStart without more crazy
185 // DAG crawling.
186 SmallDenseMap<SUnit *, SUnit *, 16> CallSeqEndForStart;
187
188public:
189 ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190 SchedulingPriorityQueue *availqueue,
191 CodeGenOptLevel OptLevel)
192 : ScheduleDAGSDNodes(mf), NeedLatency(needlatency),
193 AvailableQueue(availqueue), Topo(SUnits, nullptr) {
194 const TargetSubtargetInfo &STI = mf.getSubtarget();
195 if (DisableSchedCycles || !NeedLatency)
196 HazardRec = new ScheduleHazardRecognizer();
197 else
198 HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
199 }
200
201 ~ScheduleDAGRRList() override {
202 delete HazardRec;
203 delete AvailableQueue;
204 }
205
206 void Schedule() override;
207
208 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
209
210 /// IsReachable - Checks if SU is reachable from TargetSU.
211 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
212 return Topo.IsReachable(SU, TargetSU);
213 }
214
215 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
216 /// create a cycle.
217 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
218 return Topo.WillCreateCycle(SU, TargetSU);
219 }
220
221 /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
222 /// This returns true if this is a new predecessor.
223 /// Does *NOT* update the topological ordering! It just queues an update.
224 void AddPredQueued(SUnit *SU, const SDep &D) {
225 Topo.AddPredQueued(SU, D.getSUnit());
226 SU->addPred(D);
227 }
228
229 /// AddPred - adds a predecessor edge to SUnit SU.
230 /// This returns true if this is a new predecessor.
231 /// Updates the topological ordering if required.
232 void AddPred(SUnit *SU, const SDep &D) {
233 Topo.AddPred(SU, D.getSUnit());
234 SU->addPred(D);
235 }
236
237 /// RemovePred - removes a predecessor edge from SUnit SU.
238 /// This returns true if an edge was removed.
239 /// Updates the topological ordering if required.
240 void RemovePred(SUnit *SU, const SDep &D) {
241 Topo.RemovePred(SU, D.getSUnit());
242 SU->removePred(D);
243 }
244
245private:
246 bool isReady(SUnit *SU) {
247 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
248 AvailableQueue->isReady(SU);
249 }
250
251 void ReleasePred(SUnit *SU, const SDep *PredEdge);
252 void ReleasePredecessors(SUnit *SU);
253 void ReleasePending();
254 void AdvanceToCycle(unsigned NextCycle);
255 void AdvancePastStalls(SUnit *SU);
256 void EmitNode(SUnit *SU);
257 void ScheduleNodeBottomUp(SUnit*);
258 void CapturePred(SDep *PredEdge);
259 void UnscheduleNodeBottomUp(SUnit*);
260 void RestoreHazardCheckerBottomUp();
261 void BacktrackBottomUp(SUnit*, SUnit*);
262 SUnit *TryUnfoldSU(SUnit *);
263 SUnit *CopyAndMoveSuccessors(SUnit*);
264 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
265 const TargetRegisterClass*,
266 const TargetRegisterClass*,
267 SmallVectorImpl<SUnit*>&);
268 bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
269
270 void releaseInterferences(unsigned Reg = 0);
271
272 SUnit *PickNodeToScheduleBottomUp();
273 void ListScheduleBottomUp();
274
275 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
276 SUnit *CreateNewSUnit(SDNode *N) {
277 unsigned NumSUnits = SUnits.size();
278 SUnit *NewNode = newSUnit(N);
279 // Update the topological ordering.
280 if (NewNode->NodeNum >= NumSUnits)
281 Topo.AddSUnitWithoutPredecessors(NewNode);
282 return NewNode;
283 }
284
285 /// CreateClone - Creates a new SUnit from an existing one.
286 SUnit *CreateClone(SUnit *N) {
287 unsigned NumSUnits = SUnits.size();
288 SUnit *NewNode = Clone(N);
289 // Update the topological ordering.
290 if (NewNode->NodeNum >= NumSUnits)
291 Topo.AddSUnitWithoutPredecessors(NewNode);
292 return NewNode;
293 }
294
295 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
296 /// need actual latency information but the hybrid scheduler does.
297 bool forceUnitLatencies() const override {
298 return !NeedLatency;
299 }
300};
301
302} // end anonymous namespace
303
304static constexpr unsigned RegSequenceCost = 1;
305
306/// GetCostForDef - Looks up the register class and cost for a given definition.
307/// Typically this just means looking up the representative register class,
308/// but for untyped values (MVT::Untyped) it means inspecting the node's
309/// opcode to determine what register class is being generated.
311 const TargetLowering *TLI,
312 const TargetInstrInfo *TII,
313 const TargetRegisterInfo *TRI,
314 unsigned &RegClass, unsigned &Cost,
315 const MachineFunction &MF) {
316 MVT VT = RegDefPos.GetValue();
317
318 // Special handling for untyped values. These values can only come from
319 // the expansion of custom DAG-to-DAG patterns.
320 if (VT == MVT::Untyped) {
321 const SDNode *Node = RegDefPos.GetNode();
322
323 // Special handling for CopyFromReg of untyped values.
324 if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
325 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
327 RegClass = RC->getID();
328 Cost = 1;
329 return;
330 }
331
332 unsigned Opcode = Node->getMachineOpcode();
333 if (Opcode == TargetOpcode::REG_SEQUENCE) {
334 unsigned DstRCIdx = Node->getConstantOperandVal(0);
335 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
336 RegClass = RC->getID();
337 Cost = RegSequenceCost;
338 return;
339 }
340
341 unsigned Idx = RegDefPos.GetIdx();
342 const MCInstrDesc &Desc = TII->get(Opcode);
343 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx);
344 assert(RC && "Not a valid register class");
345 RegClass = RC->getID();
346 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
347 // better way to determine it.
348 Cost = 1;
349 } else {
350 RegClass = TLI->getRepRegClassFor(VT)->getID();
351 Cost = TLI->getRepRegClassCostFor(VT);
352 }
353}
354
355/// Schedule - Schedule the DAG using list scheduling.
356void ScheduleDAGRRList::Schedule() {
357 LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
358 << " '" << BB->getName() << "' **********\n");
359
360 CurCycle = 0;
361 IssueCount = 0;
362 MinAvailableCycle =
363 DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
364 NumLiveRegs = 0;
365 // Allocate slots for each physical register, plus one for a special register
366 // to track the virtual resource of a calling sequence.
367 LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
368 LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
369 CallSeqEndForStart.clear();
370 assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
371
372 // Build the scheduling graph.
373 BuildSchedGraph();
374
375 LLVM_DEBUG(dump());
376 Topo.MarkDirty();
377
378 AvailableQueue->initNodes(SUnits);
379
380 HazardRec->Reset();
381
382 // Execute the actual scheduling loop.
383 ListScheduleBottomUp();
384
385 AvailableQueue->releaseState();
386
387 LLVM_DEBUG({
388 dbgs() << "*** Final schedule ***\n";
389 dumpSchedule();
390 dbgs() << '\n';
391 });
392}
393
394//===----------------------------------------------------------------------===//
395// Bottom-Up Scheduling
396//===----------------------------------------------------------------------===//
397
398/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
399/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
400void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
401 SUnit *PredSU = PredEdge->getSUnit();
402
403#ifndef NDEBUG
404 if (PredSU->NumSuccsLeft == 0) {
405 dbgs() << "*** Scheduling failed! ***\n";
406 dumpNode(*PredSU);
407 dbgs() << " has been released too many times!\n";
408 llvm_unreachable(nullptr);
409 }
410#endif
411 --PredSU->NumSuccsLeft;
412
413 if (!forceUnitLatencies()) {
414 // Updating predecessor's height. This is now the cycle when the
415 // predecessor can be scheduled without causing a pipeline stall.
416 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
417 }
418
419 // If all the node's successors are scheduled, this node is ready
420 // to be scheduled. Ignore the special EntrySU node.
421 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
422 PredSU->isAvailable = true;
423
424 unsigned Height = PredSU->getHeight();
425 if (Height < MinAvailableCycle)
426 MinAvailableCycle = Height;
427
428 if (isReady(PredSU)) {
429 AvailableQueue->push(PredSU);
430 }
431 // CapturePred and others may have left the node in the pending queue, avoid
432 // adding it twice.
433 else if (!PredSU->isPending) {
434 PredSU->isPending = true;
435 PendingQueue.push_back(PredSU);
436 }
437 }
438}
439
440/// IsChainDependent - Test if Outer is reachable from Inner through
441/// chain dependencies.
442static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
443 unsigned NestLevel,
444 const TargetInstrInfo *TII) {
445 SDNode *N = Outer;
446 while (true) {
447 if (N == Inner)
448 return true;
449 // For a TokenFactor, examine each operand. There may be multiple ways
450 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
451 // most nesting in order to ensure that we find the corresponding match.
452 if (N->getOpcode() == ISD::TokenFactor) {
453 for (const SDValue &Op : N->op_values())
454 if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
455 return true;
456 return false;
457 }
458 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
459 if (N->isMachineOpcode()) {
460 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
461 ++NestLevel;
462 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
463 if (NestLevel == 0)
464 return false;
465 --NestLevel;
466 }
467 }
468 // Otherwise, find the chain and continue climbing.
469 for (const SDValue &Op : N->op_values())
470 if (Op.getValueType() == MVT::Other) {
471 N = Op.getNode();
472 goto found_chain_operand;
473 }
474 return false;
475 found_chain_operand:;
476 if (N->getOpcode() == ISD::EntryToken)
477 return false;
478 }
479}
480
481/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
482/// the corresponding (lowered) CALLSEQ_BEGIN node.
483///
484/// NestLevel and MaxNested are used in recursion to indcate the current level
485/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
486/// level seen so far.
487///
488/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
489/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
490static SDNode *
491FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
492 const TargetInstrInfo *TII) {
493 while (true) {
494 // For a TokenFactor, examine each operand. There may be multiple ways
495 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
496 // most nesting in order to ensure that we find the corresponding match.
497 if (N->getOpcode() == ISD::TokenFactor) {
498 SDNode *Best = nullptr;
499 unsigned BestMaxNest = MaxNest;
500 for (const SDValue &Op : N->op_values()) {
501 unsigned MyNestLevel = NestLevel;
502 unsigned MyMaxNest = MaxNest;
503 if (SDNode *New = FindCallSeqStart(Op.getNode(),
504 MyNestLevel, MyMaxNest, TII))
505 if (!Best || (MyMaxNest > BestMaxNest)) {
506 Best = New;
507 BestMaxNest = MyMaxNest;
508 }
509 }
510 assert(Best);
511 MaxNest = BestMaxNest;
512 return Best;
513 }
514 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
515 if (N->isMachineOpcode()) {
516 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
517 ++NestLevel;
518 MaxNest = std::max(MaxNest, NestLevel);
519 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
520 assert(NestLevel != 0);
521 --NestLevel;
522 if (NestLevel == 0)
523 return N;
524 }
525 }
526 // Otherwise, find the chain and continue climbing.
527 for (const SDValue &Op : N->op_values())
528 if (Op.getValueType() == MVT::Other) {
529 N = Op.getNode();
530 goto found_chain_operand;
531 }
532 return nullptr;
533 found_chain_operand:;
534 if (N->getOpcode() == ISD::EntryToken)
535 return nullptr;
536 }
537}
538
539/// Call ReleasePred for each predecessor, then update register live def/gen.
540/// Always update LiveRegDefs for a register dependence even if the current SU
541/// also defines the register. This effectively create one large live range
542/// across a sequence of two-address node. This is important because the
543/// entire chain must be scheduled together. Example:
544///
545/// flags = (3) add
546/// flags = (2) addc flags
547/// flags = (1) addc flags
548///
549/// results in
550///
551/// LiveRegDefs[flags] = 3
552/// LiveRegGens[flags] = 1
553///
554/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
555/// interference on flags.
556void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
557 // Bottom up: release predecessors
558 for (SDep &Pred : SU->Preds) {
559 ReleasePred(SU, &Pred);
560 if (Pred.isAssignedRegDep()) {
561 // This is a physical register dependency and it's impossible or
562 // expensive to copy the register. Make sure nothing that can
563 // clobber the register is scheduled between the predecessor and
564 // this node.
565 SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
566 assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
567 "interference on register dependence");
568 LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
569 if (!LiveRegGens[Pred.getReg()]) {
570 ++NumLiveRegs;
571 LiveRegGens[Pred.getReg()] = SU;
572 }
573 }
574 }
575
576 // If we're scheduling a lowered CALLSEQ_END, find the corresponding
577 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
578 // these nodes, to prevent other calls from being interscheduled with them.
579 unsigned CallResource = TRI->getNumRegs();
580 if (!LiveRegDefs[CallResource])
581 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
582 if (Node->isMachineOpcode() &&
583 Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
584 unsigned NestLevel = 0;
585 unsigned MaxNest = 0;
586 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
587 assert(N && "Must find call sequence start");
588
589 SUnit *Def = &SUnits[N->getNodeId()];
590 CallSeqEndForStart[Def] = SU;
591
592 ++NumLiveRegs;
593 LiveRegDefs[CallResource] = Def;
594 LiveRegGens[CallResource] = SU;
595 break;
596 }
597}
598
599/// Check to see if any of the pending instructions are ready to issue. If
600/// so, add them to the available queue.
601void ScheduleDAGRRList::ReleasePending() {
602 if (DisableSchedCycles) {
603 assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
604 return;
605 }
606
607 // If the available queue is empty, it is safe to reset MinAvailableCycle.
608 if (AvailableQueue->empty())
609 MinAvailableCycle = std::numeric_limits<unsigned>::max();
610
611 // Check to see if any of the pending instructions are ready to issue. If
612 // so, add them to the available queue.
613 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
614 unsigned ReadyCycle = PendingQueue[i]->getHeight();
615 if (ReadyCycle < MinAvailableCycle)
616 MinAvailableCycle = ReadyCycle;
617
618 if (PendingQueue[i]->isAvailable) {
619 if (!isReady(PendingQueue[i]))
620 continue;
621 AvailableQueue->push(PendingQueue[i]);
622 }
623 PendingQueue[i]->isPending = false;
624 PendingQueue[i] = PendingQueue.back();
625 PendingQueue.pop_back();
626 --i; --e;
627 }
628}
629
630/// Move the scheduler state forward by the specified number of Cycles.
631void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
632 if (NextCycle <= CurCycle)
633 return;
634
635 IssueCount = 0;
636 AvailableQueue->setCurCycle(NextCycle);
637 if (!HazardRec->isEnabled()) {
638 // Bypass lots of virtual calls in case of long latency.
639 CurCycle = NextCycle;
640 }
641 else {
642 for (; CurCycle != NextCycle; ++CurCycle) {
643 HazardRec->RecedeCycle();
644 }
645 }
646 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
647 // available Q to release pending nodes at least once before popping.
648 ReleasePending();
649}
650
651/// Move the scheduler state forward until the specified node's dependents are
652/// ready and can be scheduled with no resource conflicts.
653void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
655 return;
656
657 // FIXME: Nodes such as CopyFromReg probably should not advance the current
658 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
659 // has predecessors the cycle will be advanced when they are scheduled.
660 // But given the crude nature of modeling latency though such nodes, we
661 // currently need to treat these nodes like real instructions.
662 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
663
664 unsigned ReadyCycle = SU->getHeight();
665
666 // Bump CurCycle to account for latency. We assume the latency of other
667 // available instructions may be hidden by the stall (not a full pipe stall).
668 // This updates the hazard recognizer's cycle before reserving resources for
669 // this instruction.
670 AdvanceToCycle(ReadyCycle);
671
672 // Calls are scheduled in their preceding cycle, so don't conflict with
673 // hazards from instructions after the call. EmitNode will reset the
674 // scoreboard state before emitting the call.
675 if (SU->isCall)
676 return;
677
678 // FIXME: For resource conflicts in very long non-pipelined stages, we
679 // should probably skip ahead here to avoid useless scoreboard checks.
680 int Stalls = 0;
681 while (true) {
683 HazardRec->getHazardType(SU, -Stalls);
684
686 break;
687
688 ++Stalls;
689 }
690 AdvanceToCycle(CurCycle + Stalls);
691}
692
693/// Record this SUnit in the HazardRecognizer.
694/// Does not update CurCycle.
695void ScheduleDAGRRList::EmitNode(SUnit *SU) {
696 if (!HazardRec->isEnabled())
697 return;
698
699 // Check for phys reg copy.
700 if (!SU->getNode())
701 return;
702
703 switch (SU->getNode()->getOpcode()) {
704 default:
705 assert(SU->getNode()->isMachineOpcode() &&
706 "This target-independent node should not be scheduled.");
707 break;
709 case ISD::TokenFactor:
712 case ISD::CopyToReg:
713 case ISD::CopyFromReg:
714 case ISD::EH_LABEL:
716 // Noops don't affect the scoreboard state. Copies are likely to be
717 // removed.
718 return;
719 case ISD::INLINEASM:
721 // For inline asm, clear the pipeline state.
722 HazardRec->Reset();
723 return;
724 }
725 if (SU->isCall) {
726 // Calls are scheduled with their preceding instructions. For bottom-up
727 // scheduling, clear the pipeline state before emitting.
728 HazardRec->Reset();
729 }
730
731 HazardRec->EmitInstruction(SU);
732}
733
734static void resetVRegCycle(SUnit *SU);
735
736/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
737/// count of its predecessors. If a predecessor pending count is zero, add it to
738/// the Available queue.
739void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
740 LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
741 LLVM_DEBUG(dumpNode(*SU));
742
743#ifndef NDEBUG
744 if (CurCycle < SU->getHeight())
745 LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
746 << "] pipeline stall!\n");
747#endif
748
749 // FIXME: Do not modify node height. It may interfere with
750 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
751 // node its ready cycle can aid heuristics, and after scheduling it can
752 // indicate the scheduled cycle.
753 SU->setHeightToAtLeast(CurCycle);
754
755 // Reserve resources for the scheduled instruction.
756 EmitNode(SU);
757
758 Sequence.push_back(SU);
759
760 AvailableQueue->scheduledNode(SU);
761
762 // If HazardRec is disabled, and each inst counts as one cycle, then
763 // advance CurCycle before ReleasePredecessors to avoid useless pushes to
764 // PendingQueue for schedulers that implement HasReadyFilter.
765 if (!HazardRec->isEnabled() && AvgIPC < 2)
766 AdvanceToCycle(CurCycle + 1);
767
768 // Update liveness of predecessors before successors to avoid treating a
769 // two-address node as a live range def.
770 ReleasePredecessors(SU);
771
772 // Release all the implicit physical register defs that are live.
773 for (SDep &Succ : SU->Succs) {
774 // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
775 if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
776 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
777 --NumLiveRegs;
778 LiveRegDefs[Succ.getReg()] = nullptr;
779 LiveRegGens[Succ.getReg()] = nullptr;
780 releaseInterferences(Succ.getReg());
781 }
782 }
783 // Release the special call resource dependence, if this is the beginning
784 // of a call.
785 unsigned CallResource = TRI->getNumRegs();
786 if (LiveRegDefs[CallResource] == SU)
787 for (const SDNode *SUNode = SU->getNode(); SUNode;
788 SUNode = SUNode->getGluedNode()) {
789 if (SUNode->isMachineOpcode() &&
790 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
791 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
792 --NumLiveRegs;
793 LiveRegDefs[CallResource] = nullptr;
794 LiveRegGens[CallResource] = nullptr;
795 releaseInterferences(CallResource);
796 }
797 }
798
799 resetVRegCycle(SU);
800
801 SU->isScheduled = true;
802
803 // Conditions under which the scheduler should eagerly advance the cycle:
804 // (1) No available instructions
805 // (2) All pipelines full, so available instructions must have hazards.
806 //
807 // If HazardRec is disabled, the cycle was pre-advanced before calling
808 // ReleasePredecessors. In that case, IssueCount should remain 0.
809 //
810 // Check AvailableQueue after ReleasePredecessors in case of zero latency.
811 if (HazardRec->isEnabled() || AvgIPC > 1) {
812 if (SU->getNode() && SU->getNode()->isMachineOpcode())
813 ++IssueCount;
814 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
815 || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
816 AdvanceToCycle(CurCycle + 1);
817 }
818}
819
820/// CapturePred - This does the opposite of ReleasePred. Since SU is being
821/// unscheduled, increase the succ left count of its predecessors. Remove
822/// them from AvailableQueue if necessary.
823void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
824 SUnit *PredSU = PredEdge->getSUnit();
825 if (PredSU->isAvailable) {
826 PredSU->isAvailable = false;
827 if (!PredSU->isPending)
828 AvailableQueue->remove(PredSU);
829 }
830
831 assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
832 "NumSuccsLeft will overflow!");
833 ++PredSU->NumSuccsLeft;
834}
835
836/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
837/// its predecessor states to reflect the change.
838void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
839 LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
840 LLVM_DEBUG(dumpNode(*SU));
841
842 for (SDep &Pred : SU->Preds) {
843 CapturePred(&Pred);
844 if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
845 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
846 assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
847 "Physical register dependency violated?");
848 --NumLiveRegs;
849 LiveRegDefs[Pred.getReg()] = nullptr;
850 LiveRegGens[Pred.getReg()] = nullptr;
851 releaseInterferences(Pred.getReg());
852 }
853 }
854
855 // Reclaim the special call resource dependence, if this is the beginning
856 // of a call.
857 unsigned CallResource = TRI->getNumRegs();
858 for (const SDNode *SUNode = SU->getNode(); SUNode;
859 SUNode = SUNode->getGluedNode()) {
860 if (SUNode->isMachineOpcode() &&
861 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
862 SUnit *SeqEnd = CallSeqEndForStart[SU];
863 assert(SeqEnd && "Call sequence start/end must be known");
864 assert(!LiveRegDefs[CallResource]);
865 assert(!LiveRegGens[CallResource]);
866 ++NumLiveRegs;
867 LiveRegDefs[CallResource] = SU;
868 LiveRegGens[CallResource] = SeqEnd;
869 }
870 }
871
872 // Release the special call resource dependence, if this is the end
873 // of a call.
874 if (LiveRegGens[CallResource] == SU)
875 for (const SDNode *SUNode = SU->getNode(); SUNode;
876 SUNode = SUNode->getGluedNode()) {
877 if (SUNode->isMachineOpcode() &&
878 SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
879 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
880 assert(LiveRegDefs[CallResource]);
881 assert(LiveRegGens[CallResource]);
882 --NumLiveRegs;
883 LiveRegDefs[CallResource] = nullptr;
884 LiveRegGens[CallResource] = nullptr;
885 releaseInterferences(CallResource);
886 }
887 }
888
889 for (auto &Succ : SU->Succs) {
890 if (Succ.isAssignedRegDep()) {
891 auto Reg = Succ.getReg();
892 if (!LiveRegDefs[Reg])
893 ++NumLiveRegs;
894 // This becomes the nearest def. Note that an earlier def may still be
895 // pending if this is a two-address node.
896 LiveRegDefs[Reg] = SU;
897
898 // Update LiveRegGen only if was empty before this unscheduling.
899 // This is to avoid incorrect updating LiveRegGen set in previous run.
900 if (!LiveRegGens[Reg]) {
901 // Find the successor with the lowest height.
902 LiveRegGens[Reg] = Succ.getSUnit();
903 for (auto &Succ2 : SU->Succs) {
904 if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
905 Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
906 LiveRegGens[Reg] = Succ2.getSUnit();
907 }
908 }
909 }
910 }
911 if (SU->getHeight() < MinAvailableCycle)
912 MinAvailableCycle = SU->getHeight();
913
914 SU->setHeightDirty();
915 SU->isScheduled = false;
916 SU->isAvailable = true;
917 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
918 // Don't make available until backtracking is complete.
919 SU->isPending = true;
920 PendingQueue.push_back(SU);
921 }
922 else {
923 AvailableQueue->push(SU);
924 }
925 AvailableQueue->unscheduledNode(SU);
926}
927
928/// After backtracking, the hazard checker needs to be restored to a state
929/// corresponding the current cycle.
930void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
931 HazardRec->Reset();
932
933 unsigned LookAhead = std::min((unsigned)Sequence.size(),
934 HazardRec->getMaxLookAhead());
935 if (LookAhead == 0)
936 return;
937
938 std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
939 unsigned HazardCycle = (*I)->getHeight();
940 for (auto E = Sequence.end(); I != E; ++I) {
941 SUnit *SU = *I;
942 for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
943 HazardRec->RecedeCycle();
944 }
945 EmitNode(SU);
946 }
947}
948
949/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
950/// BTCycle in order to schedule a specific node.
951void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
952 SUnit *OldSU = Sequence.back();
953 while (true) {
954 Sequence.pop_back();
955 // FIXME: use ready cycle instead of height
956 CurCycle = OldSU->getHeight();
957 UnscheduleNodeBottomUp(OldSU);
958 AvailableQueue->setCurCycle(CurCycle);
959 if (OldSU == BtSU)
960 break;
961 OldSU = Sequence.back();
962 }
963
964 assert(!SU->isSucc(OldSU) && "Something is wrong!");
965
966 RestoreHazardCheckerBottomUp();
967
968 ReleasePending();
969
970 ++NumBacktracks;
971}
972
973static bool isOperandOf(const SUnit *SU, SDNode *N) {
974 for (const SDNode *SUNode = SU->getNode(); SUNode;
975 SUNode = SUNode->getGluedNode()) {
976 if (SUNode->isOperandOf(N))
977 return true;
978 }
979 return false;
980}
981
982/// TryUnfold - Attempt to unfold
983SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
984 SDNode *N = SU->getNode();
985 // Use while over if to ease fall through.
987 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
988 return nullptr;
989
990 assert(NewNodes.size() == 2 && "Expected a load folding node!");
991
992 N = NewNodes[1];
993 SDNode *LoadNode = NewNodes[0];
994 unsigned NumVals = N->getNumValues();
995 unsigned OldNumVals = SU->getNode()->getNumValues();
996
997 // LoadNode may already exist. This can happen when there is another
998 // load from the same location and producing the same type of value
999 // but it has different alignment or volatileness.
1000 bool isNewLoad = true;
1001 SUnit *LoadSU;
1002 if (LoadNode->getNodeId() != -1) {
1003 LoadSU = &SUnits[LoadNode->getNodeId()];
1004 // If LoadSU has already been scheduled, we should clone it but
1005 // this would negate the benefit to unfolding so just return SU.
1006 if (LoadSU->isScheduled)
1007 return SU;
1008 isNewLoad = false;
1009 } else {
1010 LoadSU = CreateNewSUnit(LoadNode);
1011 LoadNode->setNodeId(LoadSU->NodeNum);
1012
1013 InitNumRegDefsLeft(LoadSU);
1014 computeLatency(LoadSU);
1015 }
1016
1017 bool isNewN = true;
1018 SUnit *NewSU;
1019 // This can only happen when isNewLoad is false.
1020 if (N->getNodeId() != -1) {
1021 NewSU = &SUnits[N->getNodeId()];
1022 // If NewSU has already been scheduled, we need to clone it, but this
1023 // negates the benefit to unfolding so just return SU.
1024 if (NewSU->isScheduled) {
1025 return SU;
1026 }
1027 isNewN = false;
1028 } else {
1029 NewSU = CreateNewSUnit(N);
1030 N->setNodeId(NewSU->NodeNum);
1031
1032 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1033 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1034 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1035 NewSU->isTwoAddress = true;
1036 break;
1037 }
1038 }
1039 if (MCID.isCommutable())
1040 NewSU->isCommutable = true;
1041
1042 InitNumRegDefsLeft(NewSU);
1043 computeLatency(NewSU);
1044 }
1045
1046 LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1047
1048 // Now that we are committed to unfolding replace DAG Uses.
1049 for (unsigned i = 0; i != NumVals; ++i)
1050 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1051 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1052 SDValue(LoadNode, 1));
1053
1054 // Record all the edges to and from the old SU, by category.
1055 SmallVector<SDep, 4> ChainPreds;
1056 SmallVector<SDep, 4> ChainSuccs;
1057 SmallVector<SDep, 4> LoadPreds;
1058 SmallVector<SDep, 4> NodePreds;
1059 SmallVector<SDep, 4> NodeSuccs;
1060 for (SDep &Pred : SU->Preds) {
1061 if (Pred.isCtrl())
1062 ChainPreds.push_back(Pred);
1063 else if (isOperandOf(Pred.getSUnit(), LoadNode))
1064 LoadPreds.push_back(Pred);
1065 else
1066 NodePreds.push_back(Pred);
1067 }
1068 for (SDep &Succ : SU->Succs) {
1069 if (Succ.isCtrl())
1070 ChainSuccs.push_back(Succ);
1071 else
1072 NodeSuccs.push_back(Succ);
1073 }
1074
1075 // Now assign edges to the newly-created nodes.
1076 for (const SDep &Pred : ChainPreds) {
1077 RemovePred(SU, Pred);
1078 if (isNewLoad)
1079 AddPredQueued(LoadSU, Pred);
1080 }
1081 for (const SDep &Pred : LoadPreds) {
1082 RemovePred(SU, Pred);
1083 if (isNewLoad)
1084 AddPredQueued(LoadSU, Pred);
1085 }
1086 for (const SDep &Pred : NodePreds) {
1087 RemovePred(SU, Pred);
1088 AddPredQueued(NewSU, Pred);
1089 }
1090 for (SDep &D : NodeSuccs) {
1091 SUnit *SuccDep = D.getSUnit();
1092 D.setSUnit(SU);
1093 RemovePred(SuccDep, D);
1094 D.setSUnit(NewSU);
1095 AddPredQueued(SuccDep, D);
1096 // Balance register pressure.
1097 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1098 !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1099 --NewSU->NumRegDefsLeft;
1100 }
1101 for (SDep &D : ChainSuccs) {
1102 SUnit *SuccDep = D.getSUnit();
1103 D.setSUnit(SU);
1104 RemovePred(SuccDep, D);
1105 if (isNewLoad) {
1106 D.setSUnit(LoadSU);
1107 AddPredQueued(SuccDep, D);
1108 }
1109 }
1110
1111 // Add a data dependency to reflect that NewSU reads the value defined
1112 // by LoadSU.
1113 SDep D(LoadSU, SDep::Data, 0);
1114 D.setLatency(LoadSU->Latency);
1115 AddPredQueued(NewSU, D);
1116
1117 if (isNewLoad)
1118 AvailableQueue->addNode(LoadSU);
1119 if (isNewN)
1120 AvailableQueue->addNode(NewSU);
1121
1122 ++NumUnfolds;
1123
1124 if (NewSU->NumSuccsLeft == 0)
1125 NewSU->isAvailable = true;
1126
1127 return NewSU;
1128}
1129
1130/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1131/// successors to the newly created node.
1132SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1133 SDNode *N = SU->getNode();
1134 if (!N)
1135 return nullptr;
1136
1137 LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1138 LLVM_DEBUG(dumpNode(*SU));
1139
1140 if (N->getGluedNode() &&
1141 !TII->canCopyGluedNodeDuringSchedule(N)) {
1142 LLVM_DEBUG(
1143 dbgs()
1144 << "Giving up because it has incoming glue and the target does not "
1145 "want to copy it\n");
1146 return nullptr;
1147 }
1148
1149 SUnit *NewSU;
1150 bool TryUnfold = false;
1151 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1152 MVT VT = N->getSimpleValueType(i);
1153 if (VT == MVT::Glue) {
1154 LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1155 return nullptr;
1156 } else if (VT == MVT::Other)
1157 TryUnfold = true;
1158 }
1159 for (const SDValue &Op : N->op_values()) {
1160 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1161 if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1162 LLVM_DEBUG(
1163 dbgs() << "Giving up because it one of the operands is glue and "
1164 "the target does not want to copy it\n");
1165 return nullptr;
1166 }
1167 }
1168
1169 // If possible unfold instruction.
1170 if (TryUnfold) {
1171 SUnit *UnfoldSU = TryUnfoldSU(SU);
1172 if (!UnfoldSU)
1173 return nullptr;
1174 SU = UnfoldSU;
1175 N = SU->getNode();
1176 // If this can be scheduled don't bother duplicating and just return
1177 if (SU->NumSuccsLeft == 0)
1178 return SU;
1179 }
1180
1181 LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1182 NewSU = CreateClone(SU);
1183
1184 // New SUnit has the exact same predecessors.
1185 for (SDep &Pred : SU->Preds)
1186 if (!Pred.isArtificial())
1187 AddPredQueued(NewSU, Pred);
1188
1189 // Make sure the clone comes after the original. (InstrEmitter assumes
1190 // this ordering.)
1191 AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
1192
1193 // Only copy scheduled successors. Cut them from old node's successor
1194 // list and move them over.
1196 for (SDep &Succ : SU->Succs) {
1197 if (Succ.isArtificial())
1198 continue;
1199 SUnit *SuccSU = Succ.getSUnit();
1200 if (SuccSU->isScheduled) {
1201 SDep D = Succ;
1202 D.setSUnit(NewSU);
1203 AddPredQueued(SuccSU, D);
1204 D.setSUnit(SU);
1205 DelDeps.emplace_back(SuccSU, D);
1206 }
1207 }
1208 for (const auto &[DelSU, DelD] : DelDeps)
1209 RemovePred(DelSU, DelD);
1210
1211 AvailableQueue->updateNode(SU);
1212 AvailableQueue->addNode(NewSU);
1213
1214 ++NumDups;
1215 return NewSU;
1216}
1217
1218/// InsertCopiesAndMoveSuccs - Insert register copies and move all
1219/// scheduled successors of the given SUnit to the last copy.
1220void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1221 const TargetRegisterClass *DestRC,
1222 const TargetRegisterClass *SrcRC,
1223 SmallVectorImpl<SUnit*> &Copies) {
1224 SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1225 CopyFromSU->CopySrcRC = SrcRC;
1226 CopyFromSU->CopyDstRC = DestRC;
1227
1228 SUnit *CopyToSU = CreateNewSUnit(nullptr);
1229 CopyToSU->CopySrcRC = DestRC;
1230 CopyToSU->CopyDstRC = SrcRC;
1231
1232 // Only copy scheduled successors. Cut them from old node's successor
1233 // list and move them over.
1235 for (SDep &Succ : SU->Succs) {
1236 if (Succ.isArtificial())
1237 continue;
1238 SUnit *SuccSU = Succ.getSUnit();
1239 if (SuccSU->isScheduled) {
1240 SDep D = Succ;
1241 D.setSUnit(CopyToSU);
1242 AddPredQueued(SuccSU, D);
1243 DelDeps.emplace_back(SuccSU, Succ);
1244 }
1245 else {
1246 // Avoid scheduling the def-side copy before other successors. Otherwise,
1247 // we could introduce another physreg interference on the copy and
1248 // continue inserting copies indefinitely.
1249 AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1250 }
1251 }
1252 for (const auto &[DelSU, DelD] : DelDeps)
1253 RemovePred(DelSU, DelD);
1254
1255 SDep FromDep(SU, SDep::Data, Reg);
1256 FromDep.setLatency(SU->Latency);
1257 AddPredQueued(CopyFromSU, FromDep);
1258 SDep ToDep(CopyFromSU, SDep::Data, 0);
1259 ToDep.setLatency(CopyFromSU->Latency);
1260 AddPredQueued(CopyToSU, ToDep);
1261
1262 AvailableQueue->updateNode(SU);
1263 AvailableQueue->addNode(CopyFromSU);
1264 AvailableQueue->addNode(CopyToSU);
1265 Copies.push_back(CopyFromSU);
1266 Copies.push_back(CopyToSU);
1267
1268 ++NumPRCopies;
1269}
1270
1271/// CheckForLiveRegDef - Return true and update live register vector if the
1272/// specified register def of the specified SUnit clobbers any "live" registers.
1273static void CheckForLiveRegDef(SUnit *SU, MCRegister Reg, SUnit **LiveRegDefs,
1274 SmallSet<unsigned, 4> &RegAdded,
1276 const TargetRegisterInfo *TRI,
1277 const SDNode *Node = nullptr) {
1278 for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1279
1280 // Check if Ref is live.
1281 if (!LiveRegDefs[*AliasI]) continue;
1282
1283 // Allow multiple uses of the same def.
1284 if (LiveRegDefs[*AliasI] == SU) continue;
1285
1286 // Allow multiple uses of same def
1287 if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
1288 continue;
1289
1290 // Add Reg to the set of interfering live regs.
1291 if (RegAdded.insert(*AliasI).second) {
1292 LRegs.push_back(*AliasI);
1293 }
1294 }
1295}
1296
1297/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1298/// by RegMask, and add them to LRegs.
1299static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1300 ArrayRef<SUnit*> LiveRegDefs,
1301 SmallSet<unsigned, 4> &RegAdded,
1303 // Look at all live registers. Skip Reg0 and the special CallResource.
1304 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1305 if (!LiveRegDefs[i]) continue;
1306 if (LiveRegDefs[i] == SU) continue;
1307 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1308 if (RegAdded.insert(i).second)
1309 LRegs.push_back(i);
1310 }
1311}
1312
1313/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1314static const uint32_t *getNodeRegMask(const SDNode *N) {
1315 for (const SDValue &Op : N->op_values())
1316 if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1317 return RegOp->getRegMask();
1318 return nullptr;
1319}
1320
1321/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1322/// scheduling of the given node to satisfy live physical register dependencies.
1323/// If the specific node is the last one that's available to schedule, do
1324/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1325bool ScheduleDAGRRList::
1326DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1327 if (NumLiveRegs == 0)
1328 return false;
1329
1330 SmallSet<unsigned, 4> RegAdded;
1331 // If this node would clobber any "live" register, then it's not ready.
1332 //
1333 // If SU is the currently live definition of the same register that it uses,
1334 // then we are free to schedule it.
1335 for (SDep &Pred : SU->Preds) {
1336 if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1337 CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1338 RegAdded, LRegs, TRI);
1339 }
1340
1341 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1342 if (Node->getOpcode() == ISD::INLINEASM ||
1343 Node->getOpcode() == ISD::INLINEASM_BR) {
1344 // Inline asm can clobber physical defs.
1345 unsigned NumOps = Node->getNumOperands();
1346 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1347 --NumOps; // Ignore the glue operand.
1348
1349 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1350 unsigned Flags = Node->getConstantOperandVal(i);
1351 const InlineAsm::Flag F(Flags);
1352 unsigned NumVals = F.getNumOperandRegisters();
1353
1354 ++i; // Skip the ID value.
1355 if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() ||
1356 F.isClobberKind()) {
1357 // Check for def of register or earlyclobber register.
1358 for (; NumVals; --NumVals, ++i) {
1359 Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1360 if (Reg.isPhysical())
1361 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1362 }
1363 } else
1364 i += NumVals;
1365 }
1366 continue;
1367 }
1368
1369 if (Node->getOpcode() == ISD::CopyToReg) {
1370 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
1371 if (Reg.isPhysical()) {
1372 SDNode *SrcNode = Node->getOperand(2).getNode();
1373 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI,
1374 SrcNode);
1375 }
1376 }
1377
1378 if (!Node->isMachineOpcode())
1379 continue;
1380 // If we're in the middle of scheduling a call, don't begin scheduling
1381 // another call. Also, don't allow any physical registers to be live across
1382 // the call.
1383 if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1384 // Check the special calling-sequence resource.
1385 unsigned CallResource = TRI->getNumRegs();
1386 if (LiveRegDefs[CallResource]) {
1387 SDNode *Gen = LiveRegGens[CallResource]->getNode();
1388 while (SDNode *Glued = Gen->getGluedNode())
1389 Gen = Glued;
1390 if (!IsChainDependent(Gen, Node, 0, TII) &&
1391 RegAdded.insert(CallResource).second)
1392 LRegs.push_back(CallResource);
1393 }
1394 }
1395 if (const uint32_t *RegMask = getNodeRegMask(Node))
1396 CheckForLiveRegDefMasked(SU, RegMask,
1397 ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1398 RegAdded, LRegs);
1399
1400 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1401 if (MCID.hasOptionalDef()) {
1402 // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1403 // This operand can be either a def of CPSR, if the S bit is set; or a use
1404 // of %noreg. When the OptionalDef is set to a valid register, we need to
1405 // handle it in the same way as an ImplicitDef.
1406 for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1407 if (MCID.operands()[i].isOptionalDef()) {
1408 const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1409 Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1410 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1411 }
1412 }
1413 for (MCPhysReg Reg : MCID.implicit_defs())
1414 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1415 }
1416
1417 return !LRegs.empty();
1418}
1419
1420void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1421 // Add the nodes that aren't ready back onto the available list.
1422 for (unsigned i = Interferences.size(); i > 0; --i) {
1423 SUnit *SU = Interferences[i-1];
1424 LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1425 if (Reg) {
1426 SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1427 if (!is_contained(LRegs, Reg))
1428 continue;
1429 }
1430 SU->isPending = false;
1431 // The interfering node may no longer be available due to backtracking.
1432 // Furthermore, it may have been made available again, in which case it is
1433 // now already in the AvailableQueue.
1434 if (SU->isAvailable && !SU->NodeQueueId) {
1435 LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1436 AvailableQueue->push(SU);
1437 }
1438 if (i < Interferences.size())
1439 Interferences[i-1] = Interferences.back();
1440 Interferences.pop_back();
1441 LRegsMap.erase(LRegsPos);
1442 }
1443}
1444
1445/// Return a node that can be scheduled in this cycle. Requirements:
1446/// (1) Ready: latency has been satisfied
1447/// (2) No Hazards: resources are available
1448/// (3) No Interferences: may unschedule to break register interferences.
1449SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1450 SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1451 auto FindAvailableNode = [&]() {
1452 while (CurSU) {
1453 SmallVector<unsigned, 4> LRegs;
1454 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1455 break;
1456 LLVM_DEBUG(dbgs() << " Interfering reg ";
1457 if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1458 else dbgs() << printReg(LRegs[0], TRI);
1459 dbgs() << " SU #" << CurSU->NodeNum << '\n');
1460 auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs);
1461 if (LRegsInserted) {
1462 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1463 Interferences.push_back(CurSU);
1464 }
1465 else {
1466 assert(CurSU->isPending && "Interferences are pending");
1467 // Update the interference with current live regs.
1468 LRegsIter->second = LRegs;
1469 }
1470 CurSU = AvailableQueue->pop();
1471 }
1472 };
1473 FindAvailableNode();
1474 if (CurSU)
1475 return CurSU;
1476
1477 // We query the topological order in the loop body, so make sure outstanding
1478 // updates are applied before entering it (we only enter the loop if there
1479 // are some interferences). If we make changes to the ordering, we exit
1480 // the loop.
1481
1482 // All candidates are delayed due to live physical reg dependencies.
1483 // Try backtracking, code duplication, or inserting cross class copies
1484 // to resolve it.
1485 for (SUnit *TrySU : Interferences) {
1486 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1487
1488 // Try unscheduling up to the point where it's safe to schedule
1489 // this node.
1490 SUnit *BtSU = nullptr;
1491 unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1492 for (unsigned Reg : LRegs) {
1493 if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1494 BtSU = LiveRegGens[Reg];
1495 LiveCycle = BtSU->getHeight();
1496 }
1497 }
1498 if (!WillCreateCycle(TrySU, BtSU)) {
1499 // BacktrackBottomUp mutates Interferences!
1500 BacktrackBottomUp(TrySU, BtSU);
1501
1502 // Force the current node to be scheduled before the node that
1503 // requires the physical reg dep.
1504 if (BtSU->isAvailable) {
1505 BtSU->isAvailable = false;
1506 if (!BtSU->isPending)
1507 AvailableQueue->remove(BtSU);
1508 }
1509 LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1510 << ") to SU(" << TrySU->NodeNum << ")\n");
1511 AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
1512
1513 // If one or more successors has been unscheduled, then the current
1514 // node is no longer available.
1515 if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1516 LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1517 CurSU = AvailableQueue->pop();
1518 } else {
1519 LLVM_DEBUG(dbgs() << "TrySU available\n");
1520 // Available and in AvailableQueue
1521 AvailableQueue->remove(TrySU);
1522 CurSU = TrySU;
1523 }
1524 FindAvailableNode();
1525 // Interferences has been mutated. We must break.
1526 break;
1527 }
1528 }
1529
1530 if (!CurSU) {
1531 // Can't backtrack. If it's too expensive to copy the value, then try
1532 // duplicate the nodes that produces these "too expensive to copy"
1533 // values to break the dependency. In case even that doesn't work,
1534 // insert cross class copies.
1535 // If it's not too expensive, i.e. cost != -1, issue copies.
1536 SUnit *TrySU = Interferences[0];
1537 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1538 assert(LRegs.size() == 1 && "Can't handle this yet!");
1539 unsigned Reg = LRegs[0];
1540 SUnit *LRDef = LiveRegDefs[Reg];
1541 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
1542 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1543
1544 // If cross copy register class is the same as RC, then it must be possible
1545 // copy the value directly. Do not try duplicate the def.
1546 // If cross copy register class is not the same as RC, then it's possible to
1547 // copy the value but it require cross register class copies and it is
1548 // expensive.
1549 // If cross copy register class is null, then it's not possible to copy
1550 // the value at all.
1551 SUnit *NewDef = nullptr;
1552 if (DestRC != RC) {
1553 NewDef = CopyAndMoveSuccessors(LRDef);
1554 if (!DestRC && !NewDef)
1555 report_fatal_error("Can't handle live physical register dependency!");
1556 }
1557 if (!NewDef) {
1558 // Issue copies, these can be expensive cross register class copies.
1560 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1561 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1562 << " to SU #" << Copies.front()->NodeNum << "\n");
1563 AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
1564 NewDef = Copies.back();
1565 }
1566
1567 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1568 << " to SU #" << TrySU->NodeNum << "\n");
1569 LiveRegDefs[Reg] = NewDef;
1570 AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
1571 TrySU->isAvailable = false;
1572 CurSU = NewDef;
1573 }
1574 assert(CurSU && "Unable to resolve live physical register dependencies!");
1575 return CurSU;
1576}
1577
1578/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1579/// schedulers.
1580void ScheduleDAGRRList::ListScheduleBottomUp() {
1581 // Release any predecessors of the special Exit node.
1582 ReleasePredecessors(&ExitSU);
1583
1584 // Add root to Available queue.
1585 if (!SUnits.empty()) {
1586 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1587 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1588 RootSU->isAvailable = true;
1589 AvailableQueue->push(RootSU);
1590 }
1591
1592 // While Available queue is not empty, grab the node with the highest
1593 // priority. If it is not ready put it back. Schedule the node.
1594 Sequence.reserve(SUnits.size());
1595 while (!AvailableQueue->empty() || !Interferences.empty()) {
1596 LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1597 AvailableQueue->dump(this));
1598
1599 // Pick the best node to schedule taking all constraints into
1600 // consideration.
1601 SUnit *SU = PickNodeToScheduleBottomUp();
1602
1603 AdvancePastStalls(SU);
1604
1605 ScheduleNodeBottomUp(SU);
1606
1607 while (AvailableQueue->empty() && !PendingQueue.empty()) {
1608 // Advance the cycle to free resources. Skip ahead to the next ready SU.
1609 assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1610 "MinAvailableCycle uninitialized");
1611 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1612 }
1613 }
1614
1615 // Reverse the order if it is bottom up.
1616 std::reverse(Sequence.begin(), Sequence.end());
1617
1618#ifndef NDEBUG
1619 VerifyScheduledSequence(/*isBottomUp=*/true);
1620#endif
1621}
1622
1623namespace {
1624
1625class RegReductionPQBase;
1626
1627struct queue_sort {
1628 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1629};
1630
1631#ifndef NDEBUG
1632template<class SF>
1633struct reverse_sort : public queue_sort {
1634 SF &SortFunc;
1635
1636 reverse_sort(SF &sf) : SortFunc(sf) {}
1637
1638 bool operator()(SUnit* left, SUnit* right) const {
1639 // reverse left/right rather than simply !SortFunc(left, right)
1640 // to expose different paths in the comparison logic.
1641 return SortFunc(right, left);
1642 }
1643};
1644#endif // NDEBUG
1645
1646/// bu_ls_rr_sort - Priority function for bottom up register pressure
1647// reduction scheduler.
1648struct bu_ls_rr_sort : public queue_sort {
1649 enum {
1650 IsBottomUp = true,
1651 HasReadyFilter = false
1652 };
1653
1654 RegReductionPQBase *SPQ;
1655
1656 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1657
1658 bool operator()(SUnit* left, SUnit* right) const;
1659};
1660
1661// src_ls_rr_sort - Priority function for source order scheduler.
1662struct src_ls_rr_sort : public queue_sort {
1663 enum {
1664 IsBottomUp = true,
1665 HasReadyFilter = false
1666 };
1667
1668 RegReductionPQBase *SPQ;
1669
1670 src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1671
1672 bool operator()(SUnit* left, SUnit* right) const;
1673};
1674
1675// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1676struct hybrid_ls_rr_sort : public queue_sort {
1677 enum {
1678 IsBottomUp = true,
1679 HasReadyFilter = false
1680 };
1681
1682 RegReductionPQBase *SPQ;
1683
1684 hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1685
1686 bool isReady(SUnit *SU, unsigned CurCycle) const;
1687
1688 bool operator()(SUnit* left, SUnit* right) const;
1689};
1690
1691// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1692// scheduler.
1693struct ilp_ls_rr_sort : public queue_sort {
1694 enum {
1695 IsBottomUp = true,
1696 HasReadyFilter = false
1697 };
1698
1699 RegReductionPQBase *SPQ;
1700
1701 ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1702
1703 bool isReady(SUnit *SU, unsigned CurCycle) const;
1704
1705 bool operator()(SUnit* left, SUnit* right) const;
1706};
1707
1708class RegReductionPQBase : public SchedulingPriorityQueue {
1709protected:
1710 std::vector<SUnit *> Queue;
1711 unsigned CurQueueId = 0;
1712 bool TracksRegPressure;
1713 bool SrcOrder;
1714
1715 // SUnits - The SUnits for the current graph.
1716 std::vector<SUnit> *SUnits = nullptr;
1717
1718 MachineFunction &MF;
1719 const TargetInstrInfo *TII = nullptr;
1720 const TargetRegisterInfo *TRI = nullptr;
1721 const TargetLowering *TLI = nullptr;
1722 ScheduleDAGRRList *scheduleDAG = nullptr;
1723
1724 // SethiUllmanNumbers - The SethiUllman number for each node.
1725 std::vector<unsigned> SethiUllmanNumbers;
1726
1727 /// RegPressure - Tracking current reg pressure per register class.
1728 std::vector<unsigned> RegPressure;
1729
1730 /// RegLimit - Tracking the number of allocatable registers per register
1731 /// class.
1732 std::vector<unsigned> RegLimit;
1733
1734public:
1735 RegReductionPQBase(MachineFunction &mf,
1736 bool hasReadyFilter,
1737 bool tracksrp,
1738 bool srcorder,
1739 const TargetInstrInfo *tii,
1740 const TargetRegisterInfo *tri,
1741 const TargetLowering *tli)
1742 : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1743 SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1744 if (TracksRegPressure) {
1745 unsigned NumRC = TRI->getNumRegClasses();
1746 RegLimit.resize(NumRC);
1747 RegPressure.resize(NumRC);
1748 llvm::fill(RegLimit, 0);
1749 llvm::fill(RegPressure, 0);
1750 for (const TargetRegisterClass *RC : TRI->regclasses())
1751 RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1752 }
1753 }
1754
1755 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1756 scheduleDAG = scheduleDag;
1757 }
1758
1759 ScheduleHazardRecognizer* getHazardRec() {
1760 return scheduleDAG->getHazardRec();
1761 }
1762
1763 void initNodes(std::vector<SUnit> &sunits) override;
1764
1765 void addNode(const SUnit *SU) override;
1766
1767 void updateNode(const SUnit *SU) override;
1768
1769 void releaseState() override {
1770 SUnits = nullptr;
1771 SethiUllmanNumbers.clear();
1772 llvm::fill(RegPressure, 0);
1773 }
1774
1775 unsigned getNodePriority(const SUnit *SU) const;
1776
1777 unsigned getNodeOrdering(const SUnit *SU) const {
1778 if (!SU->getNode()) return 0;
1779
1780 return SU->getNode()->getIROrder();
1781 }
1782
1783 bool empty() const override { return Queue.empty(); }
1784
1785 void push(SUnit *U) override {
1786 assert(!U->NodeQueueId && "Node in the queue already");
1787 U->NodeQueueId = ++CurQueueId;
1788 Queue.push_back(U);
1789 }
1790
1791 void remove(SUnit *SU) override {
1792 assert(!Queue.empty() && "Queue is empty!");
1793 assert(SU->NodeQueueId != 0 && "Not in queue!");
1794 std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1795 if (I != std::prev(Queue.end()))
1796 std::swap(*I, Queue.back());
1797 Queue.pop_back();
1798 SU->NodeQueueId = 0;
1799 }
1800
1801 bool tracksRegPressure() const override { return TracksRegPressure; }
1802
1803 void dumpRegPressure() const;
1804
1805 bool HighRegPressure(const SUnit *SU) const;
1806
1807 bool MayReduceRegPressure(SUnit *SU) const;
1808
1809 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1810
1811 void scheduledNode(SUnit *SU) override;
1812
1813 void unscheduledNode(SUnit *SU) override;
1814
1815protected:
1816 bool canClobber(const SUnit *SU, const SUnit *Op);
1817 void AddPseudoTwoAddrDeps();
1818 void PrescheduleNodesWithMultipleUses();
1819 void CalculateSethiUllmanNumbers();
1820};
1821
1822template<class SF>
1823static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1824 unsigned BestIdx = 0;
1825 // Only compute the cost for the first 1000 items in the queue, to avoid
1826 // excessive compile-times for very large queues.
1827 for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E;
1828 I++)
1829 if (Picker(Q[BestIdx], Q[I]))
1830 BestIdx = I;
1831 SUnit *V = Q[BestIdx];
1832 if (BestIdx + 1 != Q.size())
1833 std::swap(Q[BestIdx], Q.back());
1834 Q.pop_back();
1835 return V;
1836}
1837
1838template<class SF>
1839SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1840#ifndef NDEBUG
1841 if (DAG->StressSched) {
1842 reverse_sort<SF> RPicker(Picker);
1843 return popFromQueueImpl(Q, RPicker);
1844 }
1845#endif
1846 (void)DAG;
1847 return popFromQueueImpl(Q, Picker);
1848}
1849
1850//===----------------------------------------------------------------------===//
1851// RegReductionPriorityQueue Definition
1852//===----------------------------------------------------------------------===//
1853//
1854// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1855// to reduce register pressure.
1856//
1857template<class SF>
1858class RegReductionPriorityQueue : public RegReductionPQBase {
1859 SF Picker;
1860
1861public:
1862 RegReductionPriorityQueue(MachineFunction &mf,
1863 bool tracksrp,
1864 bool srcorder,
1865 const TargetInstrInfo *tii,
1866 const TargetRegisterInfo *tri,
1867 const TargetLowering *tli)
1868 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1869 tii, tri, tli),
1870 Picker(this) {}
1871
1872 bool isBottomUp() const override { return SF::IsBottomUp; }
1873
1874 bool isReady(SUnit *U) const override {
1875 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1876 }
1877
1878 SUnit *pop() override {
1879 if (Queue.empty()) return nullptr;
1880
1881 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1882 V->NodeQueueId = 0;
1883 return V;
1884 }
1885
1886#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1887 LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1888 // Emulate pop() without clobbering NodeQueueIds.
1889 std::vector<SUnit *> DumpQueue = Queue;
1890 SF DumpPicker = Picker;
1891 while (!DumpQueue.empty()) {
1892 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1893 dbgs() << "Height " << SU->getHeight() << ": ";
1894 DAG->dumpNode(*SU);
1895 }
1896 }
1897#endif
1898};
1899
1900using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1901using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1902using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1903using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1904
1905} // end anonymous namespace
1906
1907//===----------------------------------------------------------------------===//
1908// Static Node Priority for Register Pressure Reduction
1909//===----------------------------------------------------------------------===//
1910
1911// Check for special nodes that bypass scheduling heuristics.
1912// Currently this pushes TokenFactor nodes down, but may be used for other
1913// pseudo-ops as well.
1914//
1915// Return -1 to schedule right above left, 1 for left above right.
1916// Return 0 if no bias exists.
1917static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1918 bool LSchedLow = left->isScheduleLow;
1919 bool RSchedLow = right->isScheduleLow;
1920 if (LSchedLow != RSchedLow)
1921 return LSchedLow < RSchedLow ? 1 : -1;
1922 return 0;
1923}
1924
1925/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1926/// Smaller number is the higher priority.
1927static unsigned
1928CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1929 if (SUNumbers[SU->NodeNum] != 0)
1930 return SUNumbers[SU->NodeNum];
1931
1932 // Use WorkList to avoid stack overflow on excessively large IRs.
1933 struct WorkState {
1934 WorkState(const SUnit *SU) : SU(SU) {}
1935 const SUnit *SU;
1936 unsigned PredsProcessed = 0;
1937 };
1938
1940 WorkList.push_back(SU);
1941 while (!WorkList.empty()) {
1942 auto &Temp = WorkList.back();
1943 auto *TempSU = Temp.SU;
1944 bool AllPredsKnown = true;
1945 // Try to find a non-evaluated pred and push it into the processing stack.
1946 for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1947 auto &Pred = TempSU->Preds[P];
1948 if (Pred.isCtrl()) continue; // ignore chain preds
1949 SUnit *PredSU = Pred.getSUnit();
1950 if (SUNumbers[PredSU->NodeNum] == 0) {
1951#ifndef NDEBUG
1952 // In debug mode, check that we don't have such element in the stack.
1953 for (auto It : WorkList)
1954 assert(It.SU != PredSU && "Trying to push an element twice?");
1955#endif
1956 // Next time start processing this one starting from the next pred.
1957 Temp.PredsProcessed = P + 1;
1958 WorkList.push_back(PredSU);
1959 AllPredsKnown = false;
1960 break;
1961 }
1962 }
1963
1964 if (!AllPredsKnown)
1965 continue;
1966
1967 // Once all preds are known, we can calculate the answer for this one.
1968 unsigned SethiUllmanNumber = 0;
1969 unsigned Extra = 0;
1970 for (const SDep &Pred : TempSU->Preds) {
1971 if (Pred.isCtrl()) continue; // ignore chain preds
1972 SUnit *PredSU = Pred.getSUnit();
1973 unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1974 assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
1975 if (PredSethiUllman > SethiUllmanNumber) {
1976 SethiUllmanNumber = PredSethiUllman;
1977 Extra = 0;
1978 } else if (PredSethiUllman == SethiUllmanNumber)
1979 ++Extra;
1980 }
1981
1982 SethiUllmanNumber += Extra;
1983 if (SethiUllmanNumber == 0)
1984 SethiUllmanNumber = 1;
1985 SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
1986 WorkList.pop_back();
1987 }
1988
1989 assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
1990 return SUNumbers[SU->NodeNum];
1991}
1992
1993/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1994/// scheduling units.
1995void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1996 SethiUllmanNumbers.assign(SUnits->size(), 0);
1997
1998 for (const SUnit &SU : *SUnits)
1999 CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
2000}
2001
2002void RegReductionPQBase::addNode(const SUnit *SU) {
2003 unsigned SUSize = SethiUllmanNumbers.size();
2004 if (SUnits->size() > SUSize)
2005 SethiUllmanNumbers.resize(SUSize*2, 0);
2006 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2007}
2008
2009void RegReductionPQBase::updateNode(const SUnit *SU) {
2010 SethiUllmanNumbers[SU->NodeNum] = 0;
2011 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2012}
2013
2014// Lower priority means schedule further down. For bottom-up scheduling, lower
2015// priority SUs are scheduled before higher priority SUs.
2016unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2017 assert(SU->NodeNum < SethiUllmanNumbers.size());
2018 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2020 // CopyToReg should be close to its uses to facilitate coalescing and
2021 // avoid spilling.
2022 return 0;
2023 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2024 Opc == TargetOpcode::SUBREG_TO_REG ||
2025 Opc == TargetOpcode::INSERT_SUBREG)
2026 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2027 // close to their uses to facilitate coalescing.
2028 return 0;
2029 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2030 // If SU does not have a register use, i.e. it doesn't produce a value
2031 // that would be consumed (e.g. store), then it terminates a chain of
2032 // computation. Give it a large SethiUllman number so it will be
2033 // scheduled right before its predecessors that it doesn't lengthen
2034 // their live ranges.
2035 return 0xffff;
2036 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2037 // If SU does not have a register def, schedule it close to its uses
2038 // because it does not lengthen any live ranges.
2039 return 0;
2040#if 1
2041 return SethiUllmanNumbers[SU->NodeNum];
2042#else
2043 unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2044 if (SU->isCallOp) {
2045 // FIXME: This assumes all of the defs are used as call operands.
2046 int NP = (int)Priority - SU->getNode()->getNumValues();
2047 return (NP > 0) ? NP : 0;
2048 }
2049 return Priority;
2050#endif
2051}
2052
2053//===----------------------------------------------------------------------===//
2054// Register Pressure Tracking
2055//===----------------------------------------------------------------------===//
2056
2057#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2058LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2059 for (const TargetRegisterClass *RC : TRI->regclasses()) {
2060 unsigned Id = RC->getID();
2061 unsigned RP = RegPressure[Id];
2062 if (!RP) continue;
2063 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2064 << RegLimit[Id] << '\n');
2065 }
2066}
2067#endif
2068
2069bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2070 if (!TLI)
2071 return false;
2072
2073 for (const SDep &Pred : SU->Preds) {
2074 if (Pred.isCtrl())
2075 continue;
2076 SUnit *PredSU = Pred.getSUnit();
2077 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2078 // to cover the number of registers defined (they are all live).
2079 if (PredSU->NumRegDefsLeft == 0) {
2080 continue;
2081 }
2082 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2083 RegDefPos.IsValid(); RegDefPos.Advance()) {
2084 unsigned RCId, Cost;
2085 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2086
2087 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2088 return true;
2089 }
2090 }
2091 return false;
2092}
2093
2094bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2095 const SDNode *N = SU->getNode();
2096
2097 if (!N->isMachineOpcode() || !SU->NumSuccs)
2098 return false;
2099
2100 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2101 for (unsigned i = 0; i != NumDefs; ++i) {
2102 MVT VT = N->getSimpleValueType(i);
2103 if (!N->hasAnyUseOfValue(i))
2104 continue;
2105 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2106 if (RegPressure[RCId] >= RegLimit[RCId])
2107 return true;
2108 }
2109 return false;
2110}
2111
2112// Compute the register pressure contribution by this instruction by count up
2113// for uses that are not live and down for defs. Only count register classes
2114// that are already under high pressure. As a side effect, compute the number of
2115// uses of registers that are already live.
2116//
2117// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2118// so could probably be factored.
2119int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2120 LiveUses = 0;
2121 int PDiff = 0;
2122 for (const SDep &Pred : SU->Preds) {
2123 if (Pred.isCtrl())
2124 continue;
2125 SUnit *PredSU = Pred.getSUnit();
2126 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2127 // to cover the number of registers defined (they are all live).
2128 if (PredSU->NumRegDefsLeft == 0) {
2129 if (PredSU->getNode()->isMachineOpcode())
2130 ++LiveUses;
2131 continue;
2132 }
2133 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2134 RegDefPos.IsValid(); RegDefPos.Advance()) {
2135 MVT VT = RegDefPos.GetValue();
2136 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2137 if (RegPressure[RCId] >= RegLimit[RCId])
2138 ++PDiff;
2139 }
2140 }
2141 const SDNode *N = SU->getNode();
2142
2143 if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2144 return PDiff;
2145
2146 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2147 for (unsigned i = 0; i != NumDefs; ++i) {
2148 MVT VT = N->getSimpleValueType(i);
2149 if (!N->hasAnyUseOfValue(i))
2150 continue;
2151 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2152 if (RegPressure[RCId] >= RegLimit[RCId])
2153 --PDiff;
2154 }
2155 return PDiff;
2156}
2157
2158void RegReductionPQBase::scheduledNode(SUnit *SU) {
2159 if (!TracksRegPressure)
2160 return;
2161
2162 if (!SU->getNode())
2163 return;
2164
2165 for (const SDep &Pred : SU->Preds) {
2166 if (Pred.isCtrl())
2167 continue;
2168 SUnit *PredSU = Pred.getSUnit();
2169 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2170 // to cover the number of registers defined (they are all live).
2171 if (PredSU->NumRegDefsLeft == 0) {
2172 continue;
2173 }
2174 // FIXME: The ScheduleDAG currently loses information about which of a
2175 // node's values is consumed by each dependence. Consequently, if the node
2176 // defines multiple register classes, we don't know which to pressurize
2177 // here. Instead the following loop consumes the register defs in an
2178 // arbitrary order. At least it handles the common case of clustered loads
2179 // to the same class. For precise liveness, each SDep needs to indicate the
2180 // result number. But that tightly couples the ScheduleDAG with the
2181 // SelectionDAG making updates tricky. A simpler hack would be to attach a
2182 // value type or register class to SDep.
2183 //
2184 // The most important aspect of register tracking is balancing the increase
2185 // here with the reduction further below. Note that this SU may use multiple
2186 // defs in PredSU. The can't be determined here, but we've already
2187 // compensated by reducing NumRegDefsLeft in PredSU during
2188 // ScheduleDAGSDNodes::AddSchedEdges.
2189 --PredSU->NumRegDefsLeft;
2190 unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2191 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2192 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2193 if (SkipRegDefs)
2194 continue;
2195
2196 unsigned RCId, Cost;
2197 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2198 RegPressure[RCId] += Cost;
2199 break;
2200 }
2201 }
2202
2203 // We should have this assert, but there may be dead SDNodes that never
2204 // materialize as SUnits, so they don't appear to generate liveness.
2205 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2206 int SkipRegDefs = (int)SU->NumRegDefsLeft;
2207 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2208 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2209 if (SkipRegDefs > 0)
2210 continue;
2211 unsigned RCId, Cost;
2212 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2213 if (RegPressure[RCId] < Cost) {
2214 // Register pressure tracking is imprecise. This can happen. But we try
2215 // hard not to let it happen because it likely results in poor scheduling.
2216 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2217 << ") has too many regdefs\n");
2218 RegPressure[RCId] = 0;
2219 }
2220 else {
2221 RegPressure[RCId] -= Cost;
2222 }
2223 }
2224 LLVM_DEBUG(dumpRegPressure());
2225}
2226
2227void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2228 if (!TracksRegPressure)
2229 return;
2230
2231 const SDNode *N = SU->getNode();
2232 if (!N) return;
2233
2234 if (!N->isMachineOpcode()) {
2235 if (N->getOpcode() != ISD::CopyToReg)
2236 return;
2237 } else {
2238 unsigned Opc = N->getMachineOpcode();
2239 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2240 Opc == TargetOpcode::INSERT_SUBREG ||
2241 Opc == TargetOpcode::SUBREG_TO_REG ||
2242 Opc == TargetOpcode::REG_SEQUENCE ||
2243 Opc == TargetOpcode::IMPLICIT_DEF)
2244 return;
2245 }
2246
2247 for (const SDep &Pred : SU->Preds) {
2248 if (Pred.isCtrl())
2249 continue;
2250 SUnit *PredSU = Pred.getSUnit();
2251 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2252 // counts data deps.
2253 if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2254 continue;
2255 const SDNode *PN = PredSU->getNode();
2256 if (!PN->isMachineOpcode()) {
2257 if (PN->getOpcode() == ISD::CopyFromReg) {
2258 MVT VT = PN->getSimpleValueType(0);
2259 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2260 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2261 }
2262 continue;
2263 }
2264 unsigned POpc = PN->getMachineOpcode();
2265 if (POpc == TargetOpcode::IMPLICIT_DEF)
2266 continue;
2267 if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2268 POpc == TargetOpcode::INSERT_SUBREG ||
2269 POpc == TargetOpcode::SUBREG_TO_REG) {
2270 MVT VT = PN->getSimpleValueType(0);
2271 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2272 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2273 continue;
2274 }
2275 if (POpc == TargetOpcode::REG_SEQUENCE) {
2276 unsigned DstRCIdx = PN->getConstantOperandVal(0);
2277 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
2278 unsigned RCId = RC->getID();
2279 // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used
2280 // here. Instead use the same constant as in GetCostForDef.
2282 continue;
2283 }
2284 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2285 for (unsigned i = 0; i != NumDefs; ++i) {
2286 MVT VT = PN->getSimpleValueType(i);
2287 if (!PN->hasAnyUseOfValue(i))
2288 continue;
2289 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2290 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2291 // Register pressure tracking is imprecise. This can happen.
2292 RegPressure[RCId] = 0;
2293 else
2294 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2295 }
2296 }
2297
2298 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2299 // may transfer data dependencies to CopyToReg.
2300 if (SU->NumSuccs && N->isMachineOpcode()) {
2301 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2302 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2303 MVT VT = N->getSimpleValueType(i);
2304 if (VT == MVT::Glue || VT == MVT::Other)
2305 continue;
2306 if (!N->hasAnyUseOfValue(i))
2307 continue;
2308 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2309 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2310 }
2311 }
2312
2313 LLVM_DEBUG(dumpRegPressure());
2314}
2315
2316//===----------------------------------------------------------------------===//
2317// Dynamic Node Priority for Register Pressure Reduction
2318//===----------------------------------------------------------------------===//
2319
2320/// closestSucc - Returns the scheduled cycle of the successor which is
2321/// closest to the current cycle.
2322static unsigned closestSucc(const SUnit *SU) {
2323 unsigned MaxHeight = 0;
2324 for (const SDep &Succ : SU->Succs) {
2325 if (Succ.isCtrl()) continue; // ignore chain succs
2326 unsigned Height = Succ.getSUnit()->getHeight();
2327 // If there are bunch of CopyToRegs stacked up, they should be considered
2328 // to be at the same position.
2329 if (Succ.getSUnit()->getNode() &&
2330 Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2331 Height = closestSucc(Succ.getSUnit())+1;
2332 if (Height > MaxHeight)
2333 MaxHeight = Height;
2334 }
2335 return MaxHeight;
2336}
2337
2338/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2339/// for scratch registers, i.e. number of data dependencies.
2340static unsigned calcMaxScratches(const SUnit *SU) {
2341 unsigned Scratches = 0;
2342 for (const SDep &Pred : SU->Preds) {
2343 if (Pred.isCtrl()) continue; // ignore chain preds
2344 Scratches++;
2345 }
2346 return Scratches;
2347}
2348
2349/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2350/// CopyFromReg from a virtual register.
2351static bool hasOnlyLiveInOpers(const SUnit *SU) {
2352 bool RetVal = false;
2353 for (const SDep &Pred : SU->Preds) {
2354 if (Pred.isCtrl()) continue;
2355 const SUnit *PredSU = Pred.getSUnit();
2356 if (PredSU->getNode() &&
2357 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2358 Register Reg =
2359 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2360 if (Reg.isVirtual()) {
2361 RetVal = true;
2362 continue;
2363 }
2364 }
2365 return false;
2366 }
2367 return RetVal;
2368}
2369
2370/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2371/// CopyToReg to a virtual register. This SU def is probably a liveout and
2372/// it has no other use. It should be scheduled closer to the terminator.
2373static bool hasOnlyLiveOutUses(const SUnit *SU) {
2374 bool RetVal = false;
2375 for (const SDep &Succ : SU->Succs) {
2376 if (Succ.isCtrl()) continue;
2377 const SUnit *SuccSU = Succ.getSUnit();
2378 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2379 Register Reg =
2380 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2381 if (Reg.isVirtual()) {
2382 RetVal = true;
2383 continue;
2384 }
2385 }
2386 return false;
2387 }
2388 return RetVal;
2389}
2390
2391// Set isVRegCycle for a node with only live in opers and live out uses. Also
2392// set isVRegCycle for its CopyFromReg operands.
2393//
2394// This is only relevant for single-block loops, in which case the VRegCycle
2395// node is likely an induction variable in which the operand and target virtual
2396// registers should be coalesced (e.g. pre/post increment values). Setting the
2397// isVRegCycle flag helps the scheduler prioritize other uses of the same
2398// CopyFromReg so that this node becomes the virtual register "kill". This
2399// avoids interference between the values live in and out of the block and
2400// eliminates a copy inside the loop.
2401static void initVRegCycle(SUnit *SU) {
2403 return;
2404
2405 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2406 return;
2407
2408 LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2409
2410 SU->isVRegCycle = true;
2411
2412 for (const SDep &Pred : SU->Preds) {
2413 if (Pred.isCtrl()) continue;
2414 Pred.getSUnit()->isVRegCycle = true;
2415 }
2416}
2417
2418// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2419// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2420static void resetVRegCycle(SUnit *SU) {
2421 if (!SU->isVRegCycle)
2422 return;
2423
2424 for (const SDep &Pred : SU->Preds) {
2425 if (Pred.isCtrl()) continue; // ignore chain preds
2426 SUnit *PredSU = Pred.getSUnit();
2427 if (PredSU->isVRegCycle) {
2428 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2429 "VRegCycle def must be CopyFromReg");
2430 Pred.getSUnit()->isVRegCycle = false;
2431 }
2432 }
2433}
2434
2435// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2436// means a node that defines the VRegCycle has not been scheduled yet.
2437static bool hasVRegCycleUse(const SUnit *SU) {
2438 // If this SU also defines the VReg, don't hoist it as a "use".
2439 if (SU->isVRegCycle)
2440 return false;
2441
2442 for (const SDep &Pred : SU->Preds) {
2443 if (Pred.isCtrl()) continue; // ignore chain preds
2444 if (Pred.getSUnit()->isVRegCycle &&
2445 Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2446 LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2447 return true;
2448 }
2449 }
2450 return false;
2451}
2452
2453// Check for either a dependence (latency) or resource (hazard) stall.
2454//
2455// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2456static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2457 if ((int)SPQ->getCurCycle() < Height) return true;
2458 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2460 return true;
2461 return false;
2462}
2463
2464// Return -1 if left has higher priority, 1 if right has higher priority.
2465// Return 0 if latency-based priority is equivalent.
2466static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2467 RegReductionPQBase *SPQ) {
2468 // Scheduling an instruction that uses a VReg whose postincrement has not yet
2469 // been scheduled will induce a copy. Model this as an extra cycle of latency.
2470 int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2471 int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2472 int LHeight = (int)left->getHeight() + LPenalty;
2473 int RHeight = (int)right->getHeight() + RPenalty;
2474
2475 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2476 BUHasStall(left, LHeight, SPQ);
2477 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2478 BUHasStall(right, RHeight, SPQ);
2479
2480 // If scheduling one of the node will cause a pipeline stall, delay it.
2481 // If scheduling either one of the node will cause a pipeline stall, sort
2482 // them according to their height.
2483 if (LStall) {
2484 if (!RStall)
2485 return 1;
2486 if (LHeight != RHeight)
2487 return LHeight > RHeight ? 1 : -1;
2488 } else if (RStall)
2489 return -1;
2490
2491 // If either node is scheduling for latency, sort them by height/depth
2492 // and latency.
2493 if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2494 right->SchedulingPref == Sched::ILP)) {
2495 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2496 // is enabled, grouping instructions by cycle, then its height is already
2497 // covered so only its depth matters. We also reach this point if both stall
2498 // but have the same height.
2499 if (!SPQ->getHazardRec()->isEnabled()) {
2500 if (LHeight != RHeight)
2501 return LHeight > RHeight ? 1 : -1;
2502 }
2503 int LDepth = left->getDepth() - LPenalty;
2504 int RDepth = right->getDepth() - RPenalty;
2505 if (LDepth != RDepth) {
2506 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2507 << ") depth " << LDepth << " vs SU (" << right->NodeNum
2508 << ") depth " << RDepth << "\n");
2509 return LDepth < RDepth ? 1 : -1;
2510 }
2511 if (left->Latency != right->Latency)
2512 return left->Latency > right->Latency ? 1 : -1;
2513 }
2514 return 0;
2515}
2516
2517static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2518 // Schedule physical register definitions close to their use. This is
2519 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2520 // long as shortening physreg live ranges is generally good, we can defer
2521 // creating a subtarget hook.
2523 bool LHasPhysReg = left->hasPhysRegDefs;
2524 bool RHasPhysReg = right->hasPhysRegDefs;
2525 if (LHasPhysReg != RHasPhysReg) {
2526 #ifndef NDEBUG
2527 static const char *const PhysRegMsg[] = { " has no physreg",
2528 " defines a physreg" };
2529 #endif
2530 LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2531 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2532 << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2533 return LHasPhysReg < RHasPhysReg;
2534 }
2535 }
2536
2537 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2538 unsigned LPriority = SPQ->getNodePriority(left);
2539 unsigned RPriority = SPQ->getNodePriority(right);
2540
2541 // Be really careful about hoisting call operands above previous calls.
2542 // Only allows it if it would reduce register pressure.
2543 if (left->isCall && right->isCallOp) {
2544 unsigned RNumVals = right->getNode()->getNumValues();
2545 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2546 }
2547 if (right->isCall && left->isCallOp) {
2548 unsigned LNumVals = left->getNode()->getNumValues();
2549 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2550 }
2551
2552 if (LPriority != RPriority)
2553 return LPriority > RPriority;
2554
2555 // One or both of the nodes are calls and their sethi-ullman numbers are the
2556 // same, then keep source order.
2557 if (left->isCall || right->isCall) {
2558 unsigned LOrder = SPQ->getNodeOrdering(left);
2559 unsigned ROrder = SPQ->getNodeOrdering(right);
2560
2561 // Prefer an ordering where the lower the non-zero order number, the higher
2562 // the preference.
2563 if ((LOrder || ROrder) && LOrder != ROrder)
2564 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2565 }
2566
2567 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2568 // e.g.
2569 // t1 = op t2, c1
2570 // t3 = op t4, c2
2571 //
2572 // and the following instructions are both ready.
2573 // t2 = op c3
2574 // t4 = op c4
2575 //
2576 // Then schedule t2 = op first.
2577 // i.e.
2578 // t4 = op c4
2579 // t2 = op c3
2580 // t1 = op t2, c1
2581 // t3 = op t4, c2
2582 //
2583 // This creates more short live intervals.
2584 unsigned LDist = closestSucc(left);
2585 unsigned RDist = closestSucc(right);
2586 if (LDist != RDist)
2587 return LDist < RDist;
2588
2589 // How many registers becomes live when the node is scheduled.
2590 unsigned LScratch = calcMaxScratches(left);
2591 unsigned RScratch = calcMaxScratches(right);
2592 if (LScratch != RScratch)
2593 return LScratch > RScratch;
2594
2595 // Comparing latency against a call makes little sense unless the node
2596 // is register pressure-neutral.
2597 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2598 return (left->NodeQueueId > right->NodeQueueId);
2599
2600 // Do not compare latencies when one or both of the nodes are calls.
2601 if (!DisableSchedCycles &&
2602 !(left->isCall || right->isCall)) {
2603 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2604 if (result != 0)
2605 return result > 0;
2606 }
2607 else {
2608 if (left->getHeight() != right->getHeight())
2609 return left->getHeight() > right->getHeight();
2610
2611 if (left->getDepth() != right->getDepth())
2612 return left->getDepth() < right->getDepth();
2613 }
2614
2615 assert(left->NodeQueueId && right->NodeQueueId &&
2616 "NodeQueueId cannot be zero");
2617 return (left->NodeQueueId > right->NodeQueueId);
2618}
2619
2620// Bottom up
2621bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2622 if (int res = checkSpecialNodes(left, right))
2623 return res > 0;
2624
2625 return BURRSort(left, right, SPQ);
2626}
2627
2628// Source order, otherwise bottom up.
2629bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2630 if (int res = checkSpecialNodes(left, right))
2631 return res > 0;
2632
2633 unsigned LOrder = SPQ->getNodeOrdering(left);
2634 unsigned ROrder = SPQ->getNodeOrdering(right);
2635
2636 // Prefer an ordering where the lower the non-zero order number, the higher
2637 // the preference.
2638 if ((LOrder || ROrder) && LOrder != ROrder)
2639 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2640
2641 return BURRSort(left, right, SPQ);
2642}
2643
2644// If the time between now and when the instruction will be ready can cover
2645// the spill code, then avoid adding it to the ready queue. This gives long
2646// stalls highest priority and allows hoisting across calls. It should also
2647// speed up processing the available queue.
2648bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2649 static const unsigned ReadyDelay = 3;
2650
2651 if (SPQ->MayReduceRegPressure(SU)) return true;
2652
2653 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2654
2655 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2657 return false;
2658
2659 return true;
2660}
2661
2662// Return true if right should be scheduled with higher priority than left.
2663bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2664 if (int res = checkSpecialNodes(left, right))
2665 return res > 0;
2666
2667 if (left->isCall || right->isCall)
2668 // No way to compute latency of calls.
2669 return BURRSort(left, right, SPQ);
2670
2671 bool LHigh = SPQ->HighRegPressure(left);
2672 bool RHigh = SPQ->HighRegPressure(right);
2673 // Avoid causing spills. If register pressure is high, schedule for
2674 // register pressure reduction.
2675 if (LHigh && !RHigh) {
2676 LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2677 << right->NodeNum << ")\n");
2678 return true;
2679 }
2680 else if (!LHigh && RHigh) {
2681 LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2682 << left->NodeNum << ")\n");
2683 return false;
2684 }
2685 if (!LHigh && !RHigh) {
2686 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2687 if (result != 0)
2688 return result > 0;
2689 }
2690 return BURRSort(left, right, SPQ);
2691}
2692
2693// Schedule as many instructions in each cycle as possible. So don't make an
2694// instruction available unless it is ready in the current cycle.
2695bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2696 if (SU->getHeight() > CurCycle) return false;
2697
2698 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2700 return false;
2701
2702 return true;
2703}
2704
2705static bool canEnableCoalescing(SUnit *SU) {
2706 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2708 // CopyToReg should be close to its uses to facilitate coalescing and
2709 // avoid spilling.
2710 return true;
2711
2712 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2713 Opc == TargetOpcode::SUBREG_TO_REG ||
2714 Opc == TargetOpcode::INSERT_SUBREG)
2715 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2716 // close to their uses to facilitate coalescing.
2717 return true;
2718
2719 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2720 // If SU does not have a register def, schedule it close to its uses
2721 // because it does not lengthen any live ranges.
2722 return true;
2723
2724 return false;
2725}
2726
2727// list-ilp is currently an experimental scheduler that allows various
2728// heuristics to be enabled prior to the normal register reduction logic.
2729bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2730 if (int res = checkSpecialNodes(left, right))
2731 return res > 0;
2732
2733 if (left->isCall || right->isCall)
2734 // No way to compute latency of calls.
2735 return BURRSort(left, right, SPQ);
2736
2737 unsigned LLiveUses = 0, RLiveUses = 0;
2738 int LPDiff = 0, RPDiff = 0;
2740 LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2741 RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2742 }
2743 if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2744 LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2745 << "): " << LPDiff << " != SU(" << right->NodeNum
2746 << "): " << RPDiff << "\n");
2747 return LPDiff > RPDiff;
2748 }
2749
2750 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2751 bool LReduce = canEnableCoalescing(left);
2752 bool RReduce = canEnableCoalescing(right);
2753 if (LReduce && !RReduce) return false;
2754 if (RReduce && !LReduce) return true;
2755 }
2756
2757 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2758 LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2759 << " != SU(" << right->NodeNum << "): " << RLiveUses
2760 << "\n");
2761 return LLiveUses < RLiveUses;
2762 }
2763
2764 if (!DisableSchedStalls) {
2765 bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2766 bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2767 if (LStall != RStall)
2768 return left->getHeight() > right->getHeight();
2769 }
2770
2772 int spread = (int)left->getDepth() - (int)right->getDepth();
2773 if (std::abs(spread) > MaxReorderWindow) {
2774 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2775 << left->getDepth() << " != SU(" << right->NodeNum
2776 << "): " << right->getDepth() << "\n");
2777 return left->getDepth() < right->getDepth();
2778 }
2779 }
2780
2781 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2782 int spread = (int)left->getHeight() - (int)right->getHeight();
2783 if (std::abs(spread) > MaxReorderWindow)
2784 return left->getHeight() > right->getHeight();
2785 }
2786
2787 return BURRSort(left, right, SPQ);
2788}
2789
2790void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2791 SUnits = &sunits;
2792 // Add pseudo dependency edges for two-address nodes.
2793 if (!Disable2AddrHack)
2794 AddPseudoTwoAddrDeps();
2795 // Reroute edges to nodes with multiple uses.
2796 if (!TracksRegPressure && !SrcOrder)
2797 PrescheduleNodesWithMultipleUses();
2798 // Calculate node priorities.
2799 CalculateSethiUllmanNumbers();
2800
2801 // For single block loops, mark nodes that look like canonical IV increments.
2802 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2803 for (SUnit &SU : sunits)
2804 initVRegCycle(&SU);
2805}
2806
2807//===----------------------------------------------------------------------===//
2808// Preschedule for Register Pressure
2809//===----------------------------------------------------------------------===//
2810
2811bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2812 if (SU->isTwoAddress) {
2813 unsigned Opc = SU->getNode()->getMachineOpcode();
2814 const MCInstrDesc &MCID = TII->get(Opc);
2815 unsigned NumRes = MCID.getNumDefs();
2816 unsigned NumOps = MCID.getNumOperands() - NumRes;
2817 for (unsigned i = 0; i != NumOps; ++i) {
2818 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2819 SDNode *DU = SU->getNode()->getOperand(i).getNode();
2820 if (DU->getNodeId() != -1 &&
2821 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2822 return true;
2823 }
2824 }
2825 }
2826 return false;
2827}
2828
2829/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2830/// successor's explicit physregs whose definition can reach DepSU.
2831/// i.e. DepSU should not be scheduled above SU.
2832static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2833 ScheduleDAGRRList *scheduleDAG,
2834 const TargetInstrInfo *TII,
2835 const TargetRegisterInfo *TRI) {
2836 ArrayRef<MCPhysReg> ImpDefs =
2837 TII->get(SU->getNode()->getMachineOpcode()).implicit_defs();
2838 const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2839 if (ImpDefs.empty() && !RegMask)
2840 return false;
2841
2842 for (const SDep &Succ : SU->Succs) {
2843 SUnit *SuccSU = Succ.getSUnit();
2844 for (const SDep &SuccPred : SuccSU->Preds) {
2845 if (!SuccPred.isAssignedRegDep())
2846 continue;
2847
2848 if (RegMask &&
2849 MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2850 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2851 return true;
2852
2853 for (MCPhysReg ImpDef : ImpDefs) {
2854 // Return true if SU clobbers this physical register use and the
2855 // definition of the register reaches from DepSU. IsReachable queries
2856 // a topological forward sort of the DAG (following the successors).
2857 if (TRI->regsOverlap(ImpDef, SuccPred.getReg()) &&
2858 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2859 return true;
2860 }
2861 }
2862 }
2863 return false;
2864}
2865
2866/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2867/// physical register defs.
2868static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2869 const TargetInstrInfo *TII,
2870 const TargetRegisterInfo *TRI) {
2871 SDNode *N = SuccSU->getNode();
2872 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2873 ArrayRef<MCPhysReg> ImpDefs = TII->get(N->getMachineOpcode()).implicit_defs();
2874 assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs");
2875 for (const SDNode *SUNode = SU->getNode(); SUNode;
2876 SUNode = SUNode->getGluedNode()) {
2877 if (!SUNode->isMachineOpcode())
2878 continue;
2879 ArrayRef<MCPhysReg> SUImpDefs =
2880 TII->get(SUNode->getMachineOpcode()).implicit_defs();
2881 const uint32_t *SURegMask = getNodeRegMask(SUNode);
2882 if (SUImpDefs.empty() && !SURegMask)
2883 continue;
2884 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2885 MVT VT = N->getSimpleValueType(i);
2886 if (VT == MVT::Glue || VT == MVT::Other)
2887 continue;
2888 if (!N->hasAnyUseOfValue(i))
2889 continue;
2890 MCPhysReg Reg = ImpDefs[i - NumDefs];
2891 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2892 return true;
2893 for (MCPhysReg SUReg : SUImpDefs) {
2894 if (TRI->regsOverlap(Reg, SUReg))
2895 return true;
2896 }
2897 }
2898 }
2899 return false;
2900}
2901
2902/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2903/// are not handled well by the general register pressure reduction
2904/// heuristics. When presented with code like this:
2905///
2906/// N
2907/// / |
2908/// / |
2909/// U store
2910/// |
2911/// ...
2912///
2913/// the heuristics tend to push the store up, but since the
2914/// operand of the store has another use (U), this would increase
2915/// the length of that other use (the U->N edge).
2916///
2917/// This function transforms code like the above to route U's
2918/// dependence through the store when possible, like this:
2919///
2920/// N
2921/// ||
2922/// ||
2923/// store
2924/// |
2925/// U
2926/// |
2927/// ...
2928///
2929/// This results in the store being scheduled immediately
2930/// after N, which shortens the U->N live range, reducing
2931/// register pressure.
2932void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2933 // Visit all the nodes in topological order, working top-down.
2934 for (SUnit &SU : *SUnits) {
2935 // For now, only look at nodes with no data successors, such as stores.
2936 // These are especially important, due to the heuristics in
2937 // getNodePriority for nodes with no data successors.
2938 if (SU.NumSuccs != 0)
2939 continue;
2940 // For now, only look at nodes with exactly one data predecessor.
2941 if (SU.NumPreds != 1)
2942 continue;
2943 // Avoid prescheduling copies to virtual registers, which don't behave
2944 // like other nodes from the perspective of scheduling heuristics.
2945 if (SDNode *N = SU.getNode())
2946 if (N->getOpcode() == ISD::CopyToReg &&
2947 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
2948 continue;
2949
2950 SDNode *PredFrameSetup = nullptr;
2951 for (const SDep &Pred : SU.Preds)
2952 if (Pred.isCtrl() && Pred.getSUnit()) {
2953 // Find the predecessor which is not data dependence.
2954 SDNode *PredND = Pred.getSUnit()->getNode();
2955
2956 // If PredND is FrameSetup, we should not pre-scheduled the node,
2957 // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
2958 // ADJCALLSTACKUP may hold CallResource too long and make other
2959 // calls can't be scheduled. If there's no other available node
2960 // to schedule, the schedular will try to rename the register by
2961 // creating copy to avoid the conflict which will fail because
2962 // CallResource is not a real physical register.
2963 if (PredND && PredND->isMachineOpcode() &&
2964 (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
2965 PredFrameSetup = PredND;
2966 break;
2967 }
2968 }
2969 // Skip the node has FrameSetup parent.
2970 if (PredFrameSetup != nullptr)
2971 continue;
2972
2973 // Locate the single data predecessor.
2974 SUnit *PredSU = nullptr;
2975 for (const SDep &Pred : SU.Preds)
2976 if (!Pred.isCtrl()) {
2977 PredSU = Pred.getSUnit();
2978 break;
2979 }
2980 assert(PredSU);
2981
2982 // Don't rewrite edges that carry physregs, because that requires additional
2983 // support infrastructure.
2984 if (PredSU->hasPhysRegDefs)
2985 continue;
2986 // Short-circuit the case where SU is PredSU's only data successor.
2987 if (PredSU->NumSuccs == 1)
2988 continue;
2989 // Avoid prescheduling to copies from virtual registers, which don't behave
2990 // like other nodes from the perspective of scheduling heuristics.
2991 if (SDNode *N = SU.getNode())
2992 if (N->getOpcode() == ISD::CopyFromReg &&
2993 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
2994 continue;
2995
2996 // Perform checks on the successors of PredSU.
2997 for (const SDep &PredSucc : PredSU->Succs) {
2998 SUnit *PredSuccSU = PredSucc.getSUnit();
2999 if (PredSuccSU == &SU) continue;
3000 // If PredSU has another successor with no data successors, for
3001 // now don't attempt to choose either over the other.
3002 if (PredSuccSU->NumSuccs == 0)
3003 goto outer_loop_continue;
3004 // Don't break physical register dependencies.
3005 if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
3006 if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
3007 goto outer_loop_continue;
3008 // Don't introduce graph cycles.
3009 if (scheduleDAG->IsReachable(&SU, PredSuccSU))
3010 goto outer_loop_continue;
3011 }
3012
3013 // Ok, the transformation is safe and the heuristics suggest it is
3014 // profitable. Update the graph.
3015 LLVM_DEBUG(
3016 dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
3017 << PredSU->NodeNum
3018 << " to guide scheduling in the presence of multiple uses\n");
3019 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
3020 SDep Edge = PredSU->Succs[i];
3021 assert(!Edge.isAssignedRegDep());
3022 SUnit *SuccSU = Edge.getSUnit();
3023 if (SuccSU != &SU) {
3024 Edge.setSUnit(PredSU);
3025 scheduleDAG->RemovePred(SuccSU, Edge);
3026 scheduleDAG->AddPredQueued(&SU, Edge);
3027 Edge.setSUnit(&SU);
3028 scheduleDAG->AddPredQueued(SuccSU, Edge);
3029 --i;
3030 }
3031 }
3032 outer_loop_continue:;
3033 }
3034}
3035
3036/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3037/// it as a def&use operand. Add a pseudo control edge from it to the other
3038/// node (if it won't create a cycle) so the two-address one will be scheduled
3039/// first (lower in the schedule). If both nodes are two-address, favor the
3040/// one that has a CopyToReg use (more likely to be a loop induction update).
3041/// If both are two-address, but one is commutable while the other is not
3042/// commutable, favor the one that's not commutable.
3043void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3044 for (SUnit &SU : *SUnits) {
3045 if (!SU.isTwoAddress)
3046 continue;
3047
3048 SDNode *Node = SU.getNode();
3049 if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3050 continue;
3051
3052 bool isLiveOut = hasOnlyLiveOutUses(&SU);
3053 unsigned Opc = Node->getMachineOpcode();
3054 const MCInstrDesc &MCID = TII->get(Opc);
3055 unsigned NumRes = MCID.getNumDefs();
3056 unsigned NumOps = MCID.getNumOperands() - NumRes;
3057 for (unsigned j = 0; j != NumOps; ++j) {
3058 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3059 continue;
3060 SDNode *DU = SU.getNode()->getOperand(j).getNode();
3061 if (DU->getNodeId() == -1)
3062 continue;
3063 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3064 if (!DUSU)
3065 continue;
3066 for (const SDep &Succ : DUSU->Succs) {
3067 if (Succ.isCtrl())
3068 continue;
3069 SUnit *SuccSU = Succ.getSUnit();
3070 if (SuccSU == &SU)
3071 continue;
3072 // Be conservative. Ignore if nodes aren't at roughly the same
3073 // depth and height.
3074 if (SuccSU->getHeight() < SU.getHeight() &&
3075 (SU.getHeight() - SuccSU->getHeight()) > 1)
3076 continue;
3077 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3078 // constrains whatever is using the copy, instead of the copy
3079 // itself. In the case that the copy is coalesced, this
3080 // preserves the intent of the pseudo two-address heurietics.
3081 while (SuccSU->Succs.size() == 1 &&
3082 SuccSU->getNode()->isMachineOpcode() &&
3083 SuccSU->getNode()->getMachineOpcode() ==
3084 TargetOpcode::COPY_TO_REGCLASS)
3085 SuccSU = SuccSU->Succs.front().getSUnit();
3086 // Don't constrain non-instruction nodes.
3087 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3088 continue;
3089 // Don't constrain nodes with physical register defs if the
3090 // predecessor can clobber them.
3091 if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3092 if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3093 continue;
3094 }
3095 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3096 // these may be coalesced away. We want them close to their uses.
3097 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3098 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3099 SuccOpc == TargetOpcode::INSERT_SUBREG ||
3100 SuccOpc == TargetOpcode::SUBREG_TO_REG)
3101 continue;
3102 if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3103 (!canClobber(SuccSU, DUSU) ||
3104 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3105 (!SU.isCommutable && SuccSU->isCommutable)) &&
3106 !scheduleDAG->IsReachable(SuccSU, &SU)) {
3108 << " Adding a pseudo-two-addr edge from SU #"
3109 << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3110 scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
3111 }
3112 }
3113 }
3114 }
3115}
3116
3117//===----------------------------------------------------------------------===//
3118// Public Constructor Functions
3119//===----------------------------------------------------------------------===//
3120
3122 CodeGenOptLevel OptLevel) {
3123 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3124 const TargetInstrInfo *TII = STI.getInstrInfo();
3125 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3126
3127 BURegReductionPriorityQueue *PQ =
3128 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3129 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3130 PQ->setScheduleDAG(SD);
3131 return SD;
3132}
3133
3136 CodeGenOptLevel OptLevel) {
3137 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3138 const TargetInstrInfo *TII = STI.getInstrInfo();
3139 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3140
3141 SrcRegReductionPriorityQueue *PQ =
3142 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3143 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3144 PQ->setScheduleDAG(SD);
3145 return SD;
3146}
3147
3150 CodeGenOptLevel OptLevel) {
3151 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3152 const TargetInstrInfo *TII = STI.getInstrInfo();
3153 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3154 const TargetLowering *TLI = IS->TLI;
3155
3156 HybridBURRPriorityQueue *PQ =
3157 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3158
3159 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3160 PQ->setScheduleDAG(SD);
3161 return SD;
3162}
3163
3165 CodeGenOptLevel OptLevel) {
3166 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3167 const TargetInstrInfo *TII = STI.getInstrInfo();
3168 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3169 const TargetLowering *TLI = IS->TLI;
3170
3171 ILPBURRPriorityQueue *PQ =
3172 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3173 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3174 PQ->setScheduleDAG(SD);
3175 return SD;
3176}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file defines the DenseMap class.
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define P(N)
SI Lower i1 Copies
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
std::pair< BasicBlock *, BasicBlock * > Edge
This file contains some templates that are useful if you are working with the STL at all.
static bool CheckForLiveRegDef(SUnit *SU, MCRegister Reg, std::vector< SUnit * > &LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI, const SDNode *Node=nullptr)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
static bool canEnableCoalescing(SUnit *SU)
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
static void resetVRegCycle(SUnit *SU)
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it's successor's explicit physregs who...
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU's physical register defs.
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle when no target itinerary exists."))
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
static void initVRegCycle(SUnit *SU)
static constexpr unsigned RegSequenceCost
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
static bool isOperandOf(const SUnit *SU, SDNode *N)
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle.
static bool hasVRegCycleUse(const SUnit *SU)
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers,...
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit * > LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask,...
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
This file describes how to lower LLVM code to machine code.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getIROrder() const
Return the node ordering.
void setNodeId(int Id)
Set unique node id.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
Scheduling dependency.
Definition ScheduleDAG.h:51
SUnit * getSUnit() const
@ Data
Regular data dependence (aka true-dependence).
Definition ScheduleDAG.h:55
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition ScheduleDAG.h:74
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
void setSUnit(SUnit *SU)
Register getReg() const
Returns the register associated with this edge.
Scheduling unit. This is a node in the scheduling DAG.
bool isCall
Is a function call.
LLVM_ABI void setHeightToAtLeast(unsigned NewHeight)
If NewHeight is greater than this node's height value, set it to be the new height value.
unsigned NumSuccs
unsigned NumPreds
unsigned NodeQueueId
Queue id of node.
unsigned NodeNum
Entry # of node in the node vector.
unsigned NumSuccsLeft
bool hasPhysRegClobbers
Has any physreg defs, used or not.
bool isCallOp
Is a function call operand.
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
LLVM_ABI void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
unsigned short Latency
Node latency.
unsigned short NumRegDefsLeft
bool isPending
True once pending.
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
bool isScheduled
True once scheduled.
bool isAvailable
True once available.
bool isScheduleLow
True if preferable to schedule low.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
Sched::Preference SchedulingPref
Scheduling preference.
const TargetRegisterClass * CopySrcRC
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
bool isTwoAddress
Is a two-address instruction.
bool isCommutable
Is a commutable instruction.
bool isVRegCycle
May use and def the same vreg.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
LLVM_ABI bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
RegDefIter - In place iteration over the values defined by an SUnit.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
virtual void dumpNode(const SUnit &SU) const =0
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
This interface is used to plug different priorities computation algorithms into the list scheduler.
void setCurCycle(unsigned Cycle)
virtual void remove(SUnit *SU)=0
virtual void releaseState()=0
virtual SUnit * pop()=0
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
virtual bool tracksRegPressure() const
virtual void dump(ScheduleDAG *) const
virtual void initNodes(std::vector< SUnit > &SUnits)=0
virtual bool empty() const =0
virtual void unscheduledNode(SUnit *)
virtual void addNode(const SUnit *SU)=0
virtual void updateNode(const SUnit *SU)=0
virtual void push(SUnit *U)=0
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetLowering * TLI
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the 'representative' register class for the specified value type.
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ EH_LABEL
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
@ ANNOTATION_LABEL
ANNOTATION_LABEL - Represents a mid basic block label used by annotations.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition ISDOpcodes.h:48
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ LIFETIME_START
This corresponds to the llvm.lifetime.
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INLINEASM
INLINEASM - Represents an inline asm block.
initializer< Ty > init(const Ty &Val)
constexpr double e
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
bool empty() const
Definition BasicBlock.h:101
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
LLVM_ABI ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler.
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
Op::Description Desc
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel)
createSourceListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
#define N